In [1]:
import pandas as pd
import numpy as np

def calculate_smoothed_displacement(row, window_size=10):
    data = pd.to_numeric(row[19:139], errors='coerce')  # Adjusted column range
    smoothed_data = data.rolling(window=window_size, center=True).mean()

    max_value_smoothed = smoothed_data.max() if np.isfinite(smoothed_data.max()) else 0
    min_value_smoothed = smoothed_data.min() if np.isfinite(smoothed_data.min()) else 0
    return max_value_smoothed - min_value_smoothed

datasets = ['df_160', 'df_210', 'df_270', 'df_320', 'df_360', 'df_380']

dfs = {}
for dataset in datasets:
    dfs[dataset] = pd.read_csv(f'../dataset/velocity_dfs/data_velocity_{dataset.split("_")[1]}.csv')

for dataset in dfs:
    dfs[dataset]['smoothed_displacement'] = dfs[dataset].apply(calculate_smoothed_displacement, axis=1)

combined_df = pd.concat(dfs.values(), ignore_index=True)

combined_df

Unnamed: 0,Date,Period,2,Cycle_Number,4,5,6,7,velocity,9,...,134,135,136,137,138,139,experiment_number,position,displacement,smoothed_displacement
0,26/01/2005 17:49:45,300.641,0,1542,1,15,1,1,160.0,4.116579,...,0.003542,0.003542,0.003572,0.003522,0.003562,,2,Right,0.000197,0.000167
1,26/01/2005 17:54:45,600.656,0,2777,1,15,1,1,160.0,4.116579,...,0.003542,0.003592,0.003522,0.003562,0.003552,,2,Right,0.000197,0.000169
2,26/01/2005 17:59:46,901.156,0,4014,1,15,1,1,160.0,4.116579,...,0.003582,0.003542,0.003552,0.003622,0.003512,,2,Right,0.000187,0.000166
3,26/01/2005 18:04:46,1201.391,0,5249,1,15,1,1,160.0,4.116579,...,0.003542,0.003592,0.003532,0.003552,0.003592,,2,Right,0.000205,0.000183
4,26/01/2005 18:09:46,1502.000,0,6489,1,15,1,1,160.0,4.116579,...,0.003572,0.003592,0.003532,0.003582,0.003572,,2,Right,0.000190,0.000166
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4507,28/06/2005 18:14:50,19228.875,0,188595,1,14,1,1,380.0,9.776876,...,-0.001156,-0.001136,-0.001186,-0.001117,-0.001117,,7,Left,0.000269,0.000242
4508,28/06/2005 18:19:50,19529.266,0,191533,1,14,1,1,380.0,9.776876,...,-0.001186,-0.001136,-0.001127,-0.001166,-0.001117,,7,Left,0.000289,0.000249
4509,28/06/2005 18:24:51,19830.219,0,194473,1,14,1,1,380.0,9.776876,...,-0.001206,-0.001127,-0.001136,-0.001176,-0.001107,,7,Left,0.000279,0.000247
4510,28/06/2005 18:29:52,20130.969,0,197411,1,14,1,1,380.0,9.776876,...,-0.001146,-0.001127,-0.001166,-0.001107,-0.001107,,7,Left,0.000282,0.000243


In [17]:
# Modify the 'position' column: "Right" becomes 0, "Left" becomes 1
combined_df['position'] = combined_df['position'].replace({'Right': 0, 'Left': 1})
# 18
combined_df.iloc[:, :]

Unnamed: 0,Date,Period,2,Cycle_Number,4,5,6,7,velocity,9,...,134,135,136,137,138,139,experiment_number,position,displacement,smoothed_displacement
0,26/01/2005 17:49:45,300.641,0,1542,1,15,1,1,160.0,4.116579,...,0.003542,0.003542,0.003572,0.003522,0.003562,,2,0,0.000197,0.000167
1,26/01/2005 17:54:45,600.656,0,2777,1,15,1,1,160.0,4.116579,...,0.003542,0.003592,0.003522,0.003562,0.003552,,2,0,0.000197,0.000169
2,26/01/2005 17:59:46,901.156,0,4014,1,15,1,1,160.0,4.116579,...,0.003582,0.003542,0.003552,0.003622,0.003512,,2,0,0.000187,0.000166
3,26/01/2005 18:04:46,1201.391,0,5249,1,15,1,1,160.0,4.116579,...,0.003542,0.003592,0.003532,0.003552,0.003592,,2,0,0.000205,0.000183
4,26/01/2005 18:09:46,1502.000,0,6489,1,15,1,1,160.0,4.116579,...,0.003572,0.003592,0.003532,0.003582,0.003572,,2,0,0.000190,0.000166
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4507,28/06/2005 18:14:50,19228.875,0,188595,1,14,1,1,380.0,9.776876,...,-0.001156,-0.001136,-0.001186,-0.001117,-0.001117,,7,1,0.000269,0.000242
4508,28/06/2005 18:19:50,19529.266,0,191533,1,14,1,1,380.0,9.776876,...,-0.001186,-0.001136,-0.001127,-0.001166,-0.001117,,7,1,0.000289,0.000249
4509,28/06/2005 18:24:51,19830.219,0,194473,1,14,1,1,380.0,9.776876,...,-0.001206,-0.001127,-0.001136,-0.001176,-0.001107,,7,1,0.000279,0.000247
4510,28/06/2005 18:29:52,20130.969,0,197411,1,14,1,1,380.0,9.776876,...,-0.001146,-0.001127,-0.001166,-0.001107,-0.001107,,7,1,0.000282,0.000243


In [18]:
combined_df = combined_df.drop(combined_df.columns[18:140], axis=1)

combined_df.head()

Unnamed: 0,Date,Period,2,Cycle_Number,4,5,6,7,velocity,9,...,12,13,14,15,16,17,experiment_number,position,displacement,smoothed_displacement
0,26/01/2005 17:49:45,300.641,0,1542,1,15,1,1,160.0,4.116579,...,1.47648e-07,9.48267e-08,,-0.010842,-0.009141,-0.005062,2,0,0.000197,0.000167
1,26/01/2005 17:54:45,600.656,0,2777,1,15,1,1,160.0,4.116579,...,1.47648e-07,9.48267e-08,,-0.010842,-0.009141,-0.005062,2,0,0.000197,0.000169
2,26/01/2005 17:59:46,901.156,0,4014,1,15,1,1,160.0,4.116579,...,1.47648e-07,9.48267e-08,,-0.010842,-0.009141,-0.005062,2,0,0.000187,0.000166
3,26/01/2005 18:04:46,1201.391,0,5249,1,15,1,1,160.0,4.116579,...,1.44511e-07,9.48267e-08,,-0.010842,-0.00913,-0.005062,2,0,0.000205,0.000183
4,26/01/2005 18:09:46,1502.0,0,6489,1,15,1,1,160.0,4.116579,...,1.44511e-07,9.48267e-08,,-0.010842,-0.00913,-0.005062,2,0,0.00019,0.000166


In [19]:
combined_df = combined_df.drop(combined_df.columns[14], axis=1)


In [20]:
combined_df

Unnamed: 0,Date,Period,2,Cycle_Number,4,5,6,7,velocity,9,...,11,12,13,15,16,17,experiment_number,position,displacement,smoothed_displacement
0,26/01/2005 17:49:45,300.641,0,1542,1,15,1,1,160.0,4.116579,...,6.349900e-08,1.476480e-07,9.482670e-08,-0.010842,-0.009141,-0.005062,2,0,0.000197,0.000167
1,26/01/2005 17:54:45,600.656,0,2777,1,15,1,1,160.0,4.116579,...,6.349900e-08,1.476480e-07,9.482670e-08,-0.010842,-0.009141,-0.005062,2,0,0.000197,0.000169
2,26/01/2005 17:59:46,901.156,0,4014,1,15,1,1,160.0,4.116579,...,6.349900e-08,1.476480e-07,9.482670e-08,-0.010842,-0.009141,-0.005062,2,0,0.000187,0.000166
3,26/01/2005 18:04:46,1201.391,0,5249,1,15,1,1,160.0,4.116579,...,6.349900e-08,1.445110e-07,9.482670e-08,-0.010842,-0.009130,-0.005062,2,0,0.000205,0.000183
4,26/01/2005 18:09:46,1502.000,0,6489,1,15,1,1,160.0,4.116579,...,6.349900e-08,1.445110e-07,9.482670e-08,-0.010842,-0.009130,-0.005062,2,0,0.000190,0.000166
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4507,28/06/2005 18:14:50,19228.875,0,188595,1,14,1,1,380.0,9.776876,...,7.619870e-08,1.150440e-07,1.009010e-07,-0.001520,-0.004245,0.000139,7,1,0.000269,0.000242
4508,28/06/2005 18:19:50,19529.266,0,191533,1,14,1,1,380.0,9.776876,...,7.619870e-08,1.158750e-07,1.009010e-07,-0.001520,-0.004238,0.000139,7,1,0.000289,0.000249
4509,28/06/2005 18:24:51,19830.219,0,194473,1,14,1,1,380.0,9.776876,...,8.653180e-08,1.161450e-07,1.009010e-07,-0.001433,-0.004235,0.000139,7,1,0.000279,0.000247
4510,28/06/2005 18:29:52,20130.969,0,197411,1,14,1,1,380.0,9.776876,...,8.734500e-08,1.074240e-07,1.009010e-07,-0.001433,-0.004205,0.000139,7,1,0.000282,0.000243


In [28]:
unique_values_col4 = combined_df.iloc[:, 4].unique()

print(unique_values_col4)


[1]


In [24]:
unique_values_col4 = combined_df.iloc[:, 5].unique()

print(unique_values_col4)


[15 14]


In [26]:
unique_values_col4 = combined_df.iloc[:, 6].unique()

print(unique_values_col4)


[1]


In [27]:
unique_values_col4 = combined_df.iloc[:, 7].unique()

print(unique_values_col4)


[1]


In [29]:
combined_df = combined_df.drop(combined_df.columns[4:8], axis=1)


In [30]:
combined_df

Unnamed: 0,Date,Period,2,Cycle_Number,velocity,9,10,11,12,13,15,16,17,experiment_number,position,displacement,smoothed_displacement
0,26/01/2005 17:49:45,300.641,0,1542,160.0,4.116579,1.0,6.349900e-08,1.476480e-07,9.482670e-08,-0.010842,-0.009141,-0.005062,2,0,0.000197,0.000167
1,26/01/2005 17:54:45,600.656,0,2777,160.0,4.116579,1.0,6.349900e-08,1.476480e-07,9.482670e-08,-0.010842,-0.009141,-0.005062,2,0,0.000197,0.000169
2,26/01/2005 17:59:46,901.156,0,4014,160.0,4.116579,1.0,6.349900e-08,1.476480e-07,9.482670e-08,-0.010842,-0.009141,-0.005062,2,0,0.000187,0.000166
3,26/01/2005 18:04:46,1201.391,0,5249,160.0,4.116579,1.0,6.349900e-08,1.445110e-07,9.482670e-08,-0.010842,-0.009130,-0.005062,2,0,0.000205,0.000183
4,26/01/2005 18:09:46,1502.000,0,6489,160.0,4.116579,1.0,6.349900e-08,1.445110e-07,9.482670e-08,-0.010842,-0.009130,-0.005062,2,0,0.000190,0.000166
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4507,28/06/2005 18:14:50,19228.875,0,188595,380.0,9.776876,1.0,7.619870e-08,1.150440e-07,1.009010e-07,-0.001520,-0.004245,0.000139,7,1,0.000269,0.000242
4508,28/06/2005 18:19:50,19529.266,0,191533,380.0,9.776876,1.0,7.619870e-08,1.158750e-07,1.009010e-07,-0.001520,-0.004238,0.000139,7,1,0.000289,0.000249
4509,28/06/2005 18:24:51,19830.219,0,194473,380.0,9.776876,1.0,8.653180e-08,1.161450e-07,1.009010e-07,-0.001433,-0.004235,0.000139,7,1,0.000279,0.000247
4510,28/06/2005 18:29:52,20130.969,0,197411,380.0,9.776876,1.0,8.734500e-08,1.074240e-07,1.009010e-07,-0.001433,-0.004205,0.000139,7,1,0.000282,0.000243


In [31]:
unique_values_col4 = combined_df.iloc[:, 2].unique()

print(unique_values_col4)


[0]


In [38]:
combined_df = combined_df.drop(combined_df.columns[2], axis=1)
combined_df


Unnamed: 0,Date,Period,Cycle_Number,velocity,9,10,11,12,13,15,16,17,experiment_number,position,displacement,smoothed_displacement
0,26/01/2005 17:49:45,300.641,1542,160.0,4.116579,1.0,6.349900e-08,1.476480e-07,9.482670e-08,-0.010842,-0.009141,-0.005062,2,0,0.000197,0.000167
1,26/01/2005 17:54:45,600.656,2777,160.0,4.116579,1.0,6.349900e-08,1.476480e-07,9.482670e-08,-0.010842,-0.009141,-0.005062,2,0,0.000197,0.000169
2,26/01/2005 17:59:46,901.156,4014,160.0,4.116579,1.0,6.349900e-08,1.476480e-07,9.482670e-08,-0.010842,-0.009141,-0.005062,2,0,0.000187,0.000166
3,26/01/2005 18:04:46,1201.391,5249,160.0,4.116579,1.0,6.349900e-08,1.445110e-07,9.482670e-08,-0.010842,-0.009130,-0.005062,2,0,0.000205,0.000183
4,26/01/2005 18:09:46,1502.000,6489,160.0,4.116579,1.0,6.349900e-08,1.445110e-07,9.482670e-08,-0.010842,-0.009130,-0.005062,2,0,0.000190,0.000166
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4507,28/06/2005 18:14:50,19228.875,188595,380.0,9.776876,1.0,7.619870e-08,1.150440e-07,1.009010e-07,-0.001520,-0.004245,0.000139,7,1,0.000269,0.000242
4508,28/06/2005 18:19:50,19529.266,191533,380.0,9.776876,1.0,7.619870e-08,1.158750e-07,1.009010e-07,-0.001520,-0.004238,0.000139,7,1,0.000289,0.000249
4509,28/06/2005 18:24:51,19830.219,194473,380.0,9.776876,1.0,8.653180e-08,1.161450e-07,1.009010e-07,-0.001433,-0.004235,0.000139,7,1,0.000279,0.000247
4510,28/06/2005 18:29:52,20130.969,197411,380.0,9.776876,1.0,8.734500e-08,1.074240e-07,1.009010e-07,-0.001433,-0.004205,0.000139,7,1,0.000282,0.000243


In [39]:
combined_df = combined_df.drop(combined_df.columns[0], axis=1)
combined_df


Unnamed: 0,Period,Cycle_Number,velocity,9,10,11,12,13,15,16,17,experiment_number,position,displacement,smoothed_displacement
0,300.641,1542,160.0,4.116579,1.0,6.349900e-08,1.476480e-07,9.482670e-08,-0.010842,-0.009141,-0.005062,2,0,0.000197,0.000167
1,600.656,2777,160.0,4.116579,1.0,6.349900e-08,1.476480e-07,9.482670e-08,-0.010842,-0.009141,-0.005062,2,0,0.000197,0.000169
2,901.156,4014,160.0,4.116579,1.0,6.349900e-08,1.476480e-07,9.482670e-08,-0.010842,-0.009141,-0.005062,2,0,0.000187,0.000166
3,1201.391,5249,160.0,4.116579,1.0,6.349900e-08,1.445110e-07,9.482670e-08,-0.010842,-0.009130,-0.005062,2,0,0.000205,0.000183
4,1502.000,6489,160.0,4.116579,1.0,6.349900e-08,1.445110e-07,9.482670e-08,-0.010842,-0.009130,-0.005062,2,0,0.000190,0.000166
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4507,19228.875,188595,380.0,9.776876,1.0,7.619870e-08,1.150440e-07,1.009010e-07,-0.001520,-0.004245,0.000139,7,1,0.000269,0.000242
4508,19529.266,191533,380.0,9.776876,1.0,7.619870e-08,1.158750e-07,1.009010e-07,-0.001520,-0.004238,0.000139,7,1,0.000289,0.000249
4509,19830.219,194473,380.0,9.776876,1.0,8.653180e-08,1.161450e-07,1.009010e-07,-0.001433,-0.004235,0.000139,7,1,0.000279,0.000247
4510,20130.969,197411,380.0,9.776876,1.0,8.734500e-08,1.074240e-07,1.009010e-07,-0.001433,-0.004205,0.000139,7,1,0.000282,0.000243


Two Options for Modeling:
1. Train separate models for each curve (i.e., each combination of experiment, position, and velocity):
- Pros:
    - Each model will focus on a specific setup, so it will capture the unique behavior of that particular experiment/position/velocity combination.
    - You avoid potential interference between the dynamics of different curves.
- Cons:
    - This will require training a separate model for each combination, which could be computationally expensive if there are many combinations.
    - It may be harder to generalize to new conditions or combinations


2. Train a single model for all curves together and provide experiment/position/velocity as inputs:
- Pros:
    - You only need one model, and the model may be able to generalize across different experiments, positions, and velocities.
    - It's more scalable if you have many combinations.
- Cons:
    - It could be harder for the model to learn the nuances of each curve, especially if the curves have very different behaviors.
    - There may be some interference between different curves, reducing the accuracy of predictions for specific conditions.

# Train separate models for each curve

In [43]:
df_filtered_160_2_0 = combined_df[(combined_df['experiment_number'] == 2) & 
                          (combined_df['position'] == 0) & 
                          (combined_df['velocity'] == 160)]

In [44]:
df_filtered_160_2_0

Unnamed: 0,Period,Cycle_Number,velocity,9,10,11,12,13,15,16,17,experiment_number,position,displacement,smoothed_displacement
0,300.641,1542,160.0,4.116579,1.0,6.349900e-08,1.476480e-07,9.482670e-08,-0.010842,-0.009141,-0.005062,2,0,0.000197,0.000167
1,600.656,2777,160.0,4.116579,1.0,6.349900e-08,1.476480e-07,9.482670e-08,-0.010842,-0.009141,-0.005062,2,0,0.000197,0.000169
2,901.156,4014,160.0,4.116579,1.0,6.349900e-08,1.476480e-07,9.482670e-08,-0.010842,-0.009141,-0.005062,2,0,0.000187,0.000166
3,1201.391,5249,160.0,4.116579,1.0,6.349900e-08,1.445110e-07,9.482670e-08,-0.010842,-0.009130,-0.005062,2,0,0.000205,0.000183
4,1502.000,6489,160.0,4.116579,1.0,6.349900e-08,1.445110e-07,9.482670e-08,-0.010842,-0.009130,-0.005062,2,0,0.000190,0.000166
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156,47179.187,194525,160.0,4.116579,1.0,6.992620e-08,1.176920e-07,7.857060e-08,-0.010790,-0.008892,-0.004856,2,0,0.000185,0.000161
157,47479.891,195763,160.0,4.116579,1.0,6.992620e-08,1.176920e-07,7.857060e-08,-0.010790,-0.008892,-0.004856,2,0,0.000190,0.000169
158,47780.484,197001,160.0,4.116579,1.0,6.992620e-08,1.176920e-07,7.857060e-08,-0.010790,-0.008892,-0.004856,2,0,0.000195,0.000166
159,48080.625,198236,160.0,4.116579,1.0,6.992620e-08,1.176920e-07,7.857060e-08,-0.010790,-0.008892,-0.004856,2,0,0.000185,0.000157


In [45]:
import xgboost as xgb
from sklearn.model_selection import train_test_split


# Define X (features) and y (target)
X = df_filtered_160_2_0[['velocity', 'Cycle_Number']]
y = df_filtered_160_2_0['smoothed_displacement']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the XGBoost model
model = xgb.XGBRegressor(objective='reg:squarederror')
model.fit(X_train, y_train)


In [46]:
from sklearn.metrics import mean_squared_error, r2_score

# Predict on the test set
y_pred = model.predict(X_test)

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Calculate R-squared (R²)
r2 = r2_score(y_test, y_pred)
print(f"R-squared Score: {r2}")


Mean Squared Error: 1.464549081850681e-11
R-squared Score: -0.02205485728868295


In [58]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score

X = combined_df[['velocity', 'Cycle_Number', 'experiment_number', 'position']]
y = combined_df['smoothed_displacement']

models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Support Vector Regressor": SVR(),
    "Neural Network": MLPRegressor(hidden_layer_sizes=(100,), max_iter=10000, random_state=42)
}

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"{name} - Mean Squared Error: {mse}")
    print(f"{name} - R-squared Score: {r2}\n")

Linear Regression - Mean Squared Error: 6.681713374964392e-10
Linear Regression - R-squared Score: 0.5574866278249495

Random Forest - Mean Squared Error: 2.818071659693187e-11
Random Forest - R-squared Score: 0.9813366074960015

Support Vector Regressor - Mean Squared Error: 3.095516651517141e-09
Support Vector Regressor - R-squared Score: -1.0500842152544

Neural Network - Mean Squared Error: 0.3863255679261871
Neural Network - R-squared Score: -255853879.92372283



#  Train a single model for all curves together and provide experiment/position/velocity as inputs:

In [None]:
# Use the entire dataset
X = combined_df[['velocity', 'Cycle_Number']]
y = combined_df['smoothed_displacement']


models = {
    "xgboost": xgb.XGBRegressor(objective='reg:squarederror'),
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Support Vector Regressor": SVR(),
    "Neural Network": MLPRegressor(hidden_layer_sizes=(100,), max_iter=500, random_state=42)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"{name} - Mean Squared Error: {mse}")
    print(f"{name} - R-squared Score: {r2}\n")