In [1]:
# import relevant libraries
import os
import pickle
import pandas as pd
import numpy as np
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import train_test_split, HalvingGridSearchCV
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

## Raw PD - Unweighted

In [77]:
# import data from csv to dataframe
filename = "raw_data_pd.csv"
df = pd.read_csv(f"train_data/{filename}")

# split into input and target features
X = df[['distance_to_road_center', 'angle_from_straight_in_rads', 'reward']].values
y = df['steering_angle'].values

Xtrain, Xval, ytrain, yval = train_test_split(X, y, test_size=0.2, random_state=42)

display(df.head())

Unnamed: 0,steering_angle,distance_to_road_center,angle_from_straight_in_rads,reward
0,2.588463,1.026888,2.956561,-0.0123
1,2.588463,1.026888,2.956561,-0.0123
2,2.588463,1.026888,2.956561,-0.0123
3,2.588463,1.026888,2.956561,-0.0123
4,2.588463,1.026888,2.956561,-0.012614


In [78]:
# grid search for promising hyperparameters
svr = SVR()

param_grid = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto'],
    'epsilon': [1e-3, 1e-2, 1e-1, 1]
}

halving_grid_search = HalvingGridSearchCV(svr, param_grid, scoring='neg_mean_squared_error', cv=10, n_jobs=-1, verbose=2)
halving_grid_search.fit(Xtrain, ytrain)

n_iterations: 4
n_required_iterations: 4
n_possible_iterations: 4
min_resources_: 3703
max_resources_: 100000
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 64
n_resources: 3703
Fitting 10 folds for each of 64 candidates, totalling 640 fits
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.1s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.1s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.1s
[CV]

In [82]:
halving_grid_search_df = pd.DataFrame(halving_grid_search.cv_results_)
display(halving_grid_search_df)

Unnamed: 0,iter,n_resources,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_epsilon,param_gamma,param_kernel,...,split2_train_score,split3_train_score,split4_train_score,split5_train_score,split6_train_score,split7_train_score,split8_train_score,split9_train_score,mean_train_score,std_train_score
0,0,3703,0.043534,0.021055,0.000499,0.000086,0.1,0.001,scale,linear,...,-5.364838e-08,-6.802847e-08,-5.525686e-08,-4.940374e-08,-4.914422e-08,-5.018484e-08,-8.564492e-08,-1.533505e-07,-6.602716e-08,3.125422e-08
1,0,3703,0.268333,0.021381,0.025417,0.005450,0.1,0.001,scale,rbf,...,-1.065677e-01,-1.184177e-01,-1.501596e-01,-1.243474e-01,-1.316804e-01,-1.539648e-01,-1.406657e-01,-1.095484e-01,-1.328500e-01,1.693960e-02
2,0,3703,0.039303,0.020249,0.000382,0.000061,0.1,0.001,auto,linear,...,-5.364838e-08,-6.802847e-08,-5.525686e-08,-4.940374e-08,-4.914422e-08,-5.018484e-08,-8.564492e-08,-1.533505e-07,-6.602716e-08,3.125422e-08
3,0,3703,0.247307,0.027311,0.020846,0.003145,0.1,0.001,auto,rbf,...,-1.067834e-01,-1.167868e-01,-1.546662e-01,-1.229659e-01,-1.272864e-01,-1.544053e-01,-1.392287e-01,-1.079589e-01,-1.327461e-01,1.835795e-02
4,0,3703,0.035903,0.002844,0.000345,0.000026,0.1,0.01,scale,linear,...,-1.997468e-05,-1.887150e-05,-2.329631e-05,-2.329797e-05,-1.170270e-05,-3.055220e-05,-1.885717e-05,-2.023128e-05,-2.131086e-05,4.582111e-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,2,33327,0.099553,0.153064,0.001095,0.000486,0.1,0.001,auto,linear,...,-2.422865e-07,-8.394877e-08,-1.301098e-07,-5.801388e-08,-3.575473e-07,-3.079214e-08,-1.456802e-07,-4.947778e-08,-1.373882e-07,9.557099e-08
93,2,33327,0.098487,0.154009,0.000945,0.000198,0.1,0.001,scale,linear,...,-2.422865e-07,-8.394877e-08,-1.301098e-07,-5.801388e-08,-3.575473e-07,-3.079214e-08,-1.456802e-07,-4.947778e-08,-1.373882e-07,9.557099e-08
94,3,99981,0.076890,0.020355,0.001876,0.000397,100,0.001,auto,linear,...,-1.772553e-07,-1.147210e-07,-1.907519e-07,-1.884190e-07,-1.158572e-07,-2.979060e-07,-1.864523e-07,-1.868100e-07,-1.687497e-07,5.461865e-08
95,3,99981,0.051301,0.014612,0.002375,0.000526,0.1,0.001,auto,linear,...,-1.992692e-07,-1.054041e-07,-2.228839e-07,-2.027180e-07,-1.237591e-07,-2.547523e-07,-1.416302e-07,-1.054424e-07,-1.566703e-07,5.454735e-08


In [84]:
best_svr = halving_grid_search.best_estimator_
best_params = halving_grid_search.best_params_

ypred = best_svr.predict(Xval)
val_mse = mean_squared_error(ypred, yval)
val_mae = mean_absolute_error(ypred, yval)
val_r2 = r2_score(ypred, yval)

print(f"Best parameters: {best_params}")
print(f"Validation MSE: {val_mse}")
print(f"Validation MAE: {val_mae}")
print(f"Validation R2 Score: {val_r2}")

Best parameters: {'C': 0.1, 'epsilon': 0.001, 'gamma': 'auto', 'kernel': 'linear'}
Validation MSE: 1.0540021607895788e-07
Validation MAE: 0.00029586180661647063
Validation R2 Score: 0.9999998945313001


In [85]:
# create folder for models
if not os.path.exists("models"):
    os.makedirs("models")

# save model in models
with open('models/unweighted_svr_model_raw_pd.pkl', 'wb') as f:
    pickle.dump(best_svr, f)


## Raw PD - Weighted

In [86]:
# import data from csv to dataframe
filename = "raw_data_pd.csv"
df = pd.read_csv(f"train_data/{filename}")

# split into input and target features
X = df[['distance_to_road_center', 'angle_from_straight_in_rads']].values
y = df['steering_angle'].values
y = y.astype(float)
r = df['reward'].values

# normalize rewards to [0, 1] range
norm_r = (r - np.min(r)) / (np.max(r) - np.min(r))

# calculate weights based on normalized rewards
weights = np.where(r < 0, 1 / (1 - norm_r), 1 / (1 + norm_r))

Xtrain, Xval, ytrain, yval, wtrain, wval = train_test_split(X, y, weights, test_size=0.2, random_state=42)

display(df.head())

[2.58846306 2.58846306 2.58846306 ... 0.14081114 0.1391202  0.13739354]


  weights = np.where(r < 0, 1 / (1 - norm_r), 1 / (1 + norm_r))


Unnamed: 0,steering_angle,distance_to_road_center,angle_from_straight_in_rads,reward
0,2.588463,1.026888,2.956561,-0.0123
1,2.588463,1.026888,2.956561,-0.0123
2,2.588463,1.026888,2.956561,-0.0123
3,2.588463,1.026888,2.956561,-0.0123
4,2.588463,1.026888,2.956561,-0.012614


In [89]:
param_grid = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto'],
    'epsilon': [1e-3, 1e-2, 1e-1, 1]
}

halving_grid_search = HalvingGridSearchCV(svr, param_grid, scoring='neg_mean_squared_error', cv=10, n_jobs=-1, verbose=2)
halving_grid_search.fit(Xtrain, ytrain, sample_weight=wtrain)

n_iterations: 4
n_required_iterations: 4
n_possible_iterations: 4
min_resources_: 3703
max_resources_: 100000
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 64
n_resources: 3703
Fitting 10 folds for each of 64 candidates, totalling 640 fits
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.1s
[CV]

In [90]:
halving_grid_search_df = pd.DataFrame(halving_grid_search.cv_results_)
display(halving_grid_search_df)

Unnamed: 0,iter,n_resources,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_epsilon,param_gamma,param_kernel,...,split2_train_score,split3_train_score,split4_train_score,split5_train_score,split6_train_score,split7_train_score,split8_train_score,split9_train_score,mean_train_score,std_train_score
0,0,3703,0.016856,0.023961,0.000897,0.000530,0.1,0.001,scale,linear,...,-7.533455e-08,-2.758568e-07,-2.604053e-07,-2.331553e-07,-1.326259e-07,-1.289424e-07,-1.350049e-07,-2.011746e-07,-1.530469e-07,8.169558e-08
1,0,3703,0.316312,0.035524,0.017686,0.005466,0.1,0.001,scale,rbf,...,-2.617948e-02,-3.813765e-02,-3.391033e-02,-4.635550e-02,-3.403823e-02,-4.031235e-02,-1.911285e-02,-3.929818e-02,-3.164845e-02,1.101845e-02
2,0,3703,0.015653,0.027186,0.000275,0.000064,0.1,0.001,auto,linear,...,-7.533455e-08,-2.758568e-07,-2.604053e-07,-2.331553e-07,-1.326259e-07,-1.289424e-07,-1.350049e-07,-2.011746e-07,-1.530469e-07,8.169558e-08
3,0,3703,0.336511,0.039031,0.019033,0.006068,0.1,0.001,auto,rbf,...,-2.804047e-02,-3.694264e-02,-3.334601e-02,-4.430016e-02,-3.347460e-02,-4.079506e-02,-1.796937e-02,-4.007505e-02,-3.127417e-02,1.089057e-02
4,0,3703,0.035587,0.019617,0.000407,0.000192,0.1,0.01,scale,linear,...,-2.600040e-05,-1.787967e-05,-2.821909e-05,-2.520017e-05,-2.660738e-05,-2.786530e-05,-1.925643e-05,-2.240789e-05,-2.346077e-05,3.737918e-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,2,33327,0.012251,0.010601,0.000674,0.000094,0.1,0.001,auto,linear,...,-5.708498e-08,-1.339092e-07,-9.256985e-08,-1.662342e-07,-2.571048e-07,-2.770463e-07,-1.469856e-07,-1.467244e-07,-1.453224e-07,7.401163e-08
93,2,33327,0.011718,0.010322,0.000676,0.000124,0.1,0.001,scale,linear,...,-5.708498e-08,-1.339092e-07,-9.256985e-08,-1.662342e-07,-2.571048e-07,-2.770463e-07,-1.469856e-07,-1.467244e-07,-1.453224e-07,7.401163e-08
94,3,99981,0.104765,0.056547,0.002395,0.000826,1,0.001,scale,linear,...,-3.950935e-08,-9.745935e-08,-2.463539e-08,-2.178246e-07,-3.942541e-08,-2.310758e-08,-3.510027e-08,-3.488931e-08,-5.911691e-08,5.642787e-08
95,3,99981,0.110273,0.053352,0.001865,0.000277,0.1,0.001,auto,linear,...,-3.950935e-08,-9.745935e-08,-2.463539e-08,-2.178246e-07,-3.942541e-08,-2.310758e-08,-3.510027e-08,-3.488931e-08,-5.911691e-08,5.642787e-08


In [93]:
best_svr = halving_grid_search.best_estimator_
best_params = halving_grid_search.best_params_

ypred = best_svr.predict(Xval)
val_mse = mean_squared_error(ypred, yval)
val_mae = mean_absolute_error(ypred, yval)
val_r2 = r2_score(ypred, yval)

print(f"Best parameters: {best_params}")
print(f"Validation MSE: {val_mse}")
print(f"Validation MAE: {val_mae}")
print(f"Validation R2 Score: {val_r2}")

Best parameters: {'C': 1, 'epsilon': 0.001, 'gamma': 'scale', 'kernel': 'linear'}
Validation MSE: 3.955278405755954e-08
Validation MAE: 0.00015181337345533186
Validation R2 Score: 0.999999960419506


In [94]:
# create folder for models
if not os.path.exists("models"):
    os.makedirs("models")

# save model in models
with open('models/weighted_svr_model_raw_pd.pkl', 'wb') as f:
    pickle.dump(best_svr, f)

## Processed PD - Unweighted

In [2]:
# import data from csv to dataframe
filename = "processed_data_pd.csv"
df = pd.read_csv(f"train_data/{filename}")

# split into input and target features
X = df[['distance_to_road_center', 'angle_from_straight_in_rads', 'reward']].values
y = df['steering_angle'].values

Xtrain, Xval, ytrain, yval = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
# grid search for promising hyperparameters
svr = SVR()

param_grid = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto'],
    'epsilon': [1e-3, 1e-2, 1e-1, 1]
}

halving_grid_search = HalvingGridSearchCV(svr, param_grid, scoring='neg_mean_squared_error', cv=10, n_jobs=-1, verbose=2)
halving_grid_search.fit(Xtrain, ytrain)

n_iterations: 4
n_required_iterations: 4
n_possible_iterations: 4
min_resources_: 3161
max_resources_: 85357
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 64
n_resources: 3161
Fitting 10 folds for each of 64 candidates, totalling 640 fits
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] 

In [4]:
halving_grid_search_df = pd.DataFrame(halving_grid_search.cv_results_)
display(halving_grid_search_df)

Unnamed: 0,iter,n_resources,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_epsilon,param_gamma,param_kernel,...,split2_train_score,split3_train_score,split4_train_score,split5_train_score,split6_train_score,split7_train_score,split8_train_score,split9_train_score,mean_train_score,std_train_score
0,0,3161,0.006443,0.004476,0.000854,0.000752,0.1,0.001,scale,linear,...,-1.466377e-06,-5.472342e-07,-1.042943e-06,-4.265111e-07,-6.597548e-07,-6.092923e-07,-7.702466e-07,-2.493912e-07,-7.720937e-07,3.375322e-07
1,0,3161,0.323501,0.026613,0.032404,0.014752,0.1,0.001,scale,rbf,...,-2.260040e-02,-2.632816e-02,-2.300201e-02,-1.550382e-02,-1.139847e-02,-2.304079e-02,-2.205009e-02,-2.022066e-02,-2.065005e-02,4.176254e-03
2,0,3161,0.004009,0.002593,0.000476,0.000193,0.1,0.001,auto,linear,...,-1.466377e-06,-5.472342e-07,-1.042943e-06,-4.265111e-07,-6.597548e-07,-6.092923e-07,-7.702466e-07,-2.493912e-07,-7.720937e-07,3.375322e-07
3,0,3161,0.332662,0.031991,0.049914,0.025865,0.1,0.001,auto,rbf,...,-2.327633e-02,-2.678475e-02,-2.403984e-02,-1.523244e-02,-1.098328e-02,-2.352416e-02,-2.247903e-02,-1.989848e-02,-2.087625e-02,4.483791e-03
4,0,3161,0.042719,0.018514,0.000570,0.000296,0.1,0.01,scale,linear,...,-1.500686e-05,-5.234374e-05,-6.591920e-05,-3.890943e-05,-2.647879e-05,-4.155475e-05,-2.292234e-05,-4.275324e-05,-3.936659e-05,1.404256e-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,2,28449,0.859028,0.503274,0.001169,0.000206,1,0.001,scale,linear,...,-4.725660e-07,-7.105296e-07,-2.971472e-07,-4.106582e-07,-1.212041e-06,-4.761411e-07,-7.991844e-07,-2.449043e-07,-5.127402e-07,2.976814e-07
93,2,28449,0.792547,0.444339,0.001213,0.000275,10,0.001,auto,linear,...,-4.725660e-07,-7.105296e-07,-2.971472e-07,-4.106582e-07,-1.212041e-06,-4.761411e-07,-7.991844e-07,-2.449043e-07,-5.127402e-07,2.976814e-07
94,3,85347,74.589244,66.341662,0.009786,0.013053,1,0.001,auto,linear,...,-4.497641e-07,-4.344961e-07,-2.143046e-07,-1.830515e-07,-1.738878e-07,-1.820219e-07,-2.501467e-07,-1.847476e-07,-2.424140e-07,1.026808e-07
95,3,85347,74.652520,66.349143,0.007215,0.004625,1,0.001,scale,linear,...,-4.497641e-07,-4.344961e-07,-2.143046e-07,-1.830515e-07,-1.738878e-07,-1.820219e-07,-2.501467e-07,-1.847476e-07,-2.424140e-07,1.026808e-07


In [5]:
best_svr = halving_grid_search.best_estimator_
best_params = halving_grid_search.best_params_

ypred = best_svr.predict(Xval)
val_mse = mean_squared_error(ypred, yval)
val_mae = mean_absolute_error(ypred, yval)
val_r2 = r2_score(ypred, yval)

print(f"Best parameters: {best_params}")
print(f"Validation MSE: {val_mse}")
print(f"Validation MAE: {val_mae}")
print(f"Validation R2 Score: {val_r2}")

Best parameters: {'C': 1, 'epsilon': 0.001, 'gamma': 'auto', 'kernel': 'linear'}
Validation MSE: 1.737632646630969e-07
Validation MAE: 0.0003403588583477811
Validation R2 Score: 0.9999998284528123


In [7]:
# create folder for models
if not os.path.exists("models"):
    os.makedirs("models")

# save model in models
with open('models/unweighted_svr_model_processed_pd.pkl', 'wb') as f:
    pickle.dump(best_svr, f)

## Processed PD - Weighted

In [8]:
# import data from csv to dataframe
filename = "processed_data_pd.csv"
df = pd.read_csv(f"train_data/{filename}")

# split into input and target features
X = df[['distance_to_road_center', 'angle_from_straight_in_rads']].values
y = df['steering_angle'].values
y = y.astype(float)
r = df['reward'].values

# normalize rewards to [0, 1] range
norm_r = (r - np.min(r)) / (np.max(r) - np.min(r))

# calculate weights based on normalized rewards
weights = np.where(r < 0, 1 / (1 - norm_r), 1 / (1 + norm_r))

Xtrain, Xval, ytrain, yval, wtrain, wval = train_test_split(X, y, weights, test_size=0.2, random_state=42)

display(df.head())

  weights = np.where(r < 0, 1 / (1 - norm_r), 1 / (1 + norm_r))


Unnamed: 0,steering_angle,distance_to_road_center,angle_from_straight_in_rads,reward
0,-3.411134,-3.017721,-1.561374,-2.491427
1,-3.411134,-3.017721,-1.561374,-2.491427
2,-3.411134,-3.017721,-1.561374,-2.491427
3,-3.411134,-3.017721,-1.561374,-2.491427
4,-3.411134,-3.017721,-1.561374,-2.493814


In [9]:
svr = SVR()

param_grid = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto'],
    'epsilon': [1e-3, 1e-2, 1e-1, 1]
}

halving_grid_search = HalvingGridSearchCV(svr, param_grid, scoring='neg_mean_squared_error', cv=10, n_jobs=-1, verbose=2)
halving_grid_search.fit(Xtrain, ytrain, sample_weight=wtrain)

n_iterations: 4
n_required_iterations: 4
n_possible_iterations: 4
min_resources_: 3161
max_resources_: 85357
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 64
n_resources: 3161
Fitting 10 folds for each of 64 candidates, totalling 640 fits
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.1s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] 

In [10]:
halving_grid_search_df = pd.DataFrame(halving_grid_search.cv_results_)
display(halving_grid_search_df)

Unnamed: 0,iter,n_resources,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_epsilon,param_gamma,param_kernel,...,split2_train_score,split3_train_score,split4_train_score,split5_train_score,split6_train_score,split7_train_score,split8_train_score,split9_train_score,mean_train_score,std_train_score
0,0,3161,0.008674,0.017782,0.000468,0.000165,0.1,0.001,scale,linear,...,-2.898190e-07,-1.128313e-07,-2.383224e-07,-2.973432e-07,-1.116483e-07,-3.209422e-07,-1.146272e-07,-2.897848e-07,-2.113137e-07,8.537798e-08
1,0,3161,0.274429,0.036567,0.014161,0.002145,0.1,0.001,scale,rbf,...,-1.779563e-02,-1.856602e-02,-1.923067e-02,-1.355260e-02,-1.490672e-02,-2.005178e-02,-1.808008e-02,-1.457346e-02,-1.764746e-02,2.430612e-03
2,0,3161,0.007341,0.013239,0.000402,0.000238,0.1,0.001,auto,linear,...,-2.898190e-07,-1.128313e-07,-2.383224e-07,-2.973432e-07,-1.116483e-07,-3.209422e-07,-1.146272e-07,-2.897848e-07,-2.113137e-07,8.537798e-08
3,0,3161,0.252534,0.050046,0.026410,0.023033,0.1,0.001,auto,rbf,...,-1.836752e-02,-1.840615e-02,-1.951190e-02,-1.389065e-02,-1.536996e-02,-1.875483e-02,-1.853827e-02,-1.402712e-02,-1.768708e-02,2.325832e-03
4,0,3161,0.034612,0.013345,0.000406,0.000053,0.1,0.01,scale,linear,...,-1.301152e-05,-1.152910e-05,-1.337642e-05,-1.441204e-05,-1.424758e-05,-1.095896e-05,-1.201689e-05,-1.142147e-05,-1.249713e-05,1.144747e-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,2,28449,0.369989,0.216593,0.000911,0.000099,0.1,0.001,scale,linear,...,-7.846012e-08,-1.111879e-07,-8.030581e-08,-2.714407e-07,-1.443569e-07,-1.319943e-07,-1.390085e-07,-1.611606e-07,-1.490311e-07,6.067345e-08
93,2,28449,0.370408,0.204419,0.000930,0.000110,0.1,0.001,auto,linear,...,-7.846012e-08,-1.111879e-07,-8.030581e-08,-2.714407e-07,-1.443569e-07,-1.319943e-07,-1.390085e-07,-1.611606e-07,-1.490311e-07,6.067345e-08
94,3,85347,4.624191,6.227064,0.002365,0.000610,10,0.001,auto,linear,...,-2.225809e-07,-2.863960e-07,-1.110684e-07,-1.113207e-07,-1.104612e-07,-1.096342e-07,-1.684085e-07,-1.114216e-07,-1.462598e-07,5.842281e-08
95,3,85347,28.656149,43.906328,0.003333,0.001503,0.1,0.001,scale,linear,...,-1.709963e-07,-1.249857e-07,-1.108719e-07,-1.397418e-07,-1.392808e-07,-1.161034e-07,-1.121731e-07,-1.113801e-07,-1.288200e-07,1.796845e-08


In [11]:
best_svr = halving_grid_search.best_estimator_
best_params = halving_grid_search.best_params_

ypred = best_svr.predict(Xval)
val_mse = mean_squared_error(ypred, yval)
val_mae = mean_absolute_error(ypred, yval)
val_r2 = r2_score(ypred, yval)

print(f"Best parameters: {best_params}")
print(f"Validation MSE: {val_mse}")
print(f"Validation MAE: {val_mae}")
print(f"Validation R2 Score: {val_r2}")

Best parameters: {'C': 0.1, 'epsilon': 0.001, 'gamma': 'scale', 'kernel': 'linear'}
Validation MSE: 1.3921678708593077e-07
Validation MAE: 0.0002696904495462891
Validation R2 Score: 0.999999862560256


In [12]:
# create folder for models
if not os.path.exists("models"):
    os.makedirs("models")

# save model in models
with open('models/weighted_svr_model_processed_pd.pkl', 'wb') as f:
    pickle.dump(best_svr, f)