<span style="color:blue">This notebook describes an alternative, easier and faster way to create a surrogate model from the simulation data that is, ultimately, a "standard" machine learning modeling.</span>

## Librairies and Data Import

In [1]:
import numpy as np  #numpy==1.21.1
import pandas as pd  #pandas==1.3.1

import plotly.graph_objects as go  #plotly==5.3.1

from sklearn.ensemble import RandomForestRegressor  #scikit-learn==0.24.2


In [2]:
df = pd.read_csv("./sim_data.csv")
df.head(20)


Unnamed: 0,Sample index,Time,Init_Cond_1,Init_Cond_2,Dyn_Input_1,Dyn_Input_2,Dyn_Output_1
0,0,0.0001,0.498024,0.501976,2.56426e-23,0.225052,0.454159
1,0,0.1001,0.498024,0.501976,0.004264079,0.116445,0.570981
2,0,0.2001,0.498024,0.501976,0.009476366,0.060386,0.570923
3,0,0.3001,0.498024,0.501976,0.01468923,0.031298,0.57086
4,0,0.4001,0.498024,0.501976,0.01990267,0.016217,0.570794
5,0,0.5001,0.498024,0.501976,0.02511671,0.008402,0.570726
6,0,0.6001,0.498024,0.501976,0.03033133,0.004354,0.570657
7,0,0.7001,0.498024,0.501976,0.03554654,0.002256,0.570588
8,0,0.8001,0.498024,0.501976,0.04076235,0.001169,0.570518
9,0,0.9001,0.498024,0.501976,0.04597875,0.000607,0.570449


In [3]:
# You might choose the target you want to predict:
# Dyn_Input_1, Dyn_Input_2, or Dyn_Output_1
target = "Dyn_Input_2"

train_test_limit = 80

df_train = df[df["Sample index"] < train_test_limit]
X_train = df_train[["Time", "Init_Cond_1", "Init_Cond_2"]]
y_train = df_train[target]
print("Train shapes:", X_train.shape, y_train.shape)

rf_reg = RandomForestRegressor()
rf_reg.fit(X_train, y_train)
print(f"Train score: {rf_reg.score(X_train, y_train):.3f}")

df_test = df[df["Sample index"] >= train_test_limit]
X_test = df_test[["Time", "Init_Cond_1", "Init_Cond_2"]]
y_test = df_test[target]
print("Test shapes:", X_test.shape, y_test.shape)
print(f"Test score: {rf_reg.score(X_test, y_test):.3f}")

feat_imp = pd.DataFrame(data=rf_reg.feature_importances_,
                        columns=["Imp"],
                        index=X_train.columns)

feat_imp.sort_values(by="Imp", ascending=False)


Train shapes: (8080, 3) (8080,)
Train score: 0.997
Test shapes: (12120, 3) (12120,)
Test score: 0.990


Unnamed: 0,Imp
Time,0.68733
Init_Cond_1,0.215155
Init_Cond_2,0.097515


In [None]:
sim_number = np.random.randint(train_test_limit, max(df["Sample index"]))

print("Sim Number:", sim_number)
ic1 = df["Init_Cond_1"][(sim_number)*100+1*sim_number]
ic2 = df["Init_Cond_2"][(sim_number)*100+1*sim_number]
print("Init_Cond_1:", ic1)
print("Init_Cond_2:", ic2)

y_truth = df[target][(df["Init_Cond_1"] == ic1) & (df["Init_Cond_2"] == ic2)]

X_valid = pd.DataFrame(data=np.round(np.linspace(0.0001, 10, 101), 5),
                       columns=["Time"])

X_valid["Init_Cond_1"] = ic1
X_valid["Init_Cond_2"] = ic2

y_predicted = rf_reg.predict(X_valid)

fig = go.Figure()

fig.add_trace(go.Scatter(y=y_predicted, mode='markers', name='predicted'))
fig.add_trace(go.Scatter(y=y_truth, mode='lines', name='truth (y)'))

fig.update_layout(autosize=False, width=800, height=600,
                  title=target, title_x=0.5)

fig.show()
