In [2]:
# import relevant libraries
import os
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import train_test_split, HalvingGridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.utils import resample
from skopt import BayesSearchCV
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

## Raw PD - Unweighted

In [77]:
# import data from csv to dataframe
filename = "raw_data_pd.csv"
df = pd.read_csv(f"train_data/{filename}")

# split into input and target features
X = df[['distance_to_road_center', 'angle_from_straight_in_rads', 'reward']].values
y = df['steering_angle'].values

Xtrain, Xval, ytrain, yval = train_test_split(X, y, test_size=0.2, random_state=42)

display(df.head())

Unnamed: 0,steering_angle,distance_to_road_center,angle_from_straight_in_rads,reward
0,2.588463,1.026888,2.956561,-0.0123
1,2.588463,1.026888,2.956561,-0.0123
2,2.588463,1.026888,2.956561,-0.0123
3,2.588463,1.026888,2.956561,-0.0123
4,2.588463,1.026888,2.956561,-0.012614


In [78]:
# grid search for promising hyperparameters
svr = SVR()

param_grid = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto'],
    'epsilon': [1e-3, 1e-2, 1e-1, 1]
}

halving_grid_search = HalvingGridSearchCV(svr, param_grid, scoring='neg_mean_squared_error', cv=10, n_jobs=-1, verbose=2)
halving_grid_search.fit(Xtrain, ytrain)

n_iterations: 4
n_required_iterations: 4
n_possible_iterations: 4
min_resources_: 3703
max_resources_: 100000
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 64
n_resources: 3703
Fitting 10 folds for each of 64 candidates, totalling 640 fits
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.1s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.1s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.1s
[CV]

In [82]:
halving_grid_search_df = pd.DataFrame(halving_grid_search.cv_results_)
display(halving_grid_search_df)

Unnamed: 0,iter,n_resources,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_epsilon,param_gamma,param_kernel,...,split2_train_score,split3_train_score,split4_train_score,split5_train_score,split6_train_score,split7_train_score,split8_train_score,split9_train_score,mean_train_score,std_train_score
0,0,3703,0.043534,0.021055,0.000499,0.000086,0.1,0.001,scale,linear,...,-5.364838e-08,-6.802847e-08,-5.525686e-08,-4.940374e-08,-4.914422e-08,-5.018484e-08,-8.564492e-08,-1.533505e-07,-6.602716e-08,3.125422e-08
1,0,3703,0.268333,0.021381,0.025417,0.005450,0.1,0.001,scale,rbf,...,-1.065677e-01,-1.184177e-01,-1.501596e-01,-1.243474e-01,-1.316804e-01,-1.539648e-01,-1.406657e-01,-1.095484e-01,-1.328500e-01,1.693960e-02
2,0,3703,0.039303,0.020249,0.000382,0.000061,0.1,0.001,auto,linear,...,-5.364838e-08,-6.802847e-08,-5.525686e-08,-4.940374e-08,-4.914422e-08,-5.018484e-08,-8.564492e-08,-1.533505e-07,-6.602716e-08,3.125422e-08
3,0,3703,0.247307,0.027311,0.020846,0.003145,0.1,0.001,auto,rbf,...,-1.067834e-01,-1.167868e-01,-1.546662e-01,-1.229659e-01,-1.272864e-01,-1.544053e-01,-1.392287e-01,-1.079589e-01,-1.327461e-01,1.835795e-02
4,0,3703,0.035903,0.002844,0.000345,0.000026,0.1,0.01,scale,linear,...,-1.997468e-05,-1.887150e-05,-2.329631e-05,-2.329797e-05,-1.170270e-05,-3.055220e-05,-1.885717e-05,-2.023128e-05,-2.131086e-05,4.582111e-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,2,33327,0.099553,0.153064,0.001095,0.000486,0.1,0.001,auto,linear,...,-2.422865e-07,-8.394877e-08,-1.301098e-07,-5.801388e-08,-3.575473e-07,-3.079214e-08,-1.456802e-07,-4.947778e-08,-1.373882e-07,9.557099e-08
93,2,33327,0.098487,0.154009,0.000945,0.000198,0.1,0.001,scale,linear,...,-2.422865e-07,-8.394877e-08,-1.301098e-07,-5.801388e-08,-3.575473e-07,-3.079214e-08,-1.456802e-07,-4.947778e-08,-1.373882e-07,9.557099e-08
94,3,99981,0.076890,0.020355,0.001876,0.000397,100,0.001,auto,linear,...,-1.772553e-07,-1.147210e-07,-1.907519e-07,-1.884190e-07,-1.158572e-07,-2.979060e-07,-1.864523e-07,-1.868100e-07,-1.687497e-07,5.461865e-08
95,3,99981,0.051301,0.014612,0.002375,0.000526,0.1,0.001,auto,linear,...,-1.992692e-07,-1.054041e-07,-2.228839e-07,-2.027180e-07,-1.237591e-07,-2.547523e-07,-1.416302e-07,-1.054424e-07,-1.566703e-07,5.454735e-08


In [84]:
best_svr = halving_grid_search.best_estimator_
best_params = halving_grid_search.best_params_

ypred = best_svr.predict(Xval)
val_mse = mean_squared_error(ypred, yval)
val_mae = mean_absolute_error(ypred, yval)
val_r2 = r2_score(ypred, yval)

print(f"Best parameters: {best_params}")
print(f"Validation MSE: {val_mse}")
print(f"Validation MAE: {val_mae}")
print(f"Validation R2 Score: {val_r2}")

Best parameters: {'C': 0.1, 'epsilon': 0.001, 'gamma': 'auto', 'kernel': 'linear'}
Validation MSE: 1.0540021607895788e-07
Validation MAE: 0.00029586180661647063
Validation R2 Score: 0.9999998945313001


In [85]:
# create folder for models
if not os.path.exists("models"):
    os.makedirs("models")

# save model in models
with open('models/unweighted_svr_model_raw_pd.pkl', 'wb') as f:
    pickle.dump(best_svr, f)


## Raw PD - Weighted

In [86]:
# import data from csv to dataframe
filename = "raw_data_pd.csv"
df = pd.read_csv(f"train_data/{filename}")

# split into input and target features
X = df[['distance_to_road_center', 'angle_from_straight_in_rads']].values
y = df['steering_angle'].values
y = y.astype(float)
r = df['reward'].values

# normalize rewards to [0, 1] range
norm_r = (r - np.min(r)) / (np.max(r) - np.min(r))

# calculate weights based on normalized rewards
weights = np.where(r < 0, 1 / (1 - norm_r), 1 / (1 + norm_r))

Xtrain, Xval, ytrain, yval, wtrain, wval = train_test_split(X, y, weights, test_size=0.2, random_state=42)

display(df.head())

[2.58846306 2.58846306 2.58846306 ... 0.14081114 0.1391202  0.13739354]


  weights = np.where(r < 0, 1 / (1 - norm_r), 1 / (1 + norm_r))


Unnamed: 0,steering_angle,distance_to_road_center,angle_from_straight_in_rads,reward
0,2.588463,1.026888,2.956561,-0.0123
1,2.588463,1.026888,2.956561,-0.0123
2,2.588463,1.026888,2.956561,-0.0123
3,2.588463,1.026888,2.956561,-0.0123
4,2.588463,1.026888,2.956561,-0.012614


In [89]:
param_grid = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto'],
    'epsilon': [1e-3, 1e-2, 1e-1, 1]
}

halving_grid_search = HalvingGridSearchCV(svr, param_grid, scoring='neg_mean_squared_error', cv=10, n_jobs=-1, verbose=2)
halving_grid_search.fit(Xtrain, ytrain, sample_weight=wtrain)

n_iterations: 4
n_required_iterations: 4
n_possible_iterations: 4
min_resources_: 3703
max_resources_: 100000
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 64
n_resources: 3703
Fitting 10 folds for each of 64 candidates, totalling 640 fits
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.1s
[CV]

In [90]:
halving_grid_search_df = pd.DataFrame(halving_grid_search.cv_results_)
display(halving_grid_search_df)

Unnamed: 0,iter,n_resources,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_epsilon,param_gamma,param_kernel,...,split2_train_score,split3_train_score,split4_train_score,split5_train_score,split6_train_score,split7_train_score,split8_train_score,split9_train_score,mean_train_score,std_train_score
0,0,3703,0.016856,0.023961,0.000897,0.000530,0.1,0.001,scale,linear,...,-7.533455e-08,-2.758568e-07,-2.604053e-07,-2.331553e-07,-1.326259e-07,-1.289424e-07,-1.350049e-07,-2.011746e-07,-1.530469e-07,8.169558e-08
1,0,3703,0.316312,0.035524,0.017686,0.005466,0.1,0.001,scale,rbf,...,-2.617948e-02,-3.813765e-02,-3.391033e-02,-4.635550e-02,-3.403823e-02,-4.031235e-02,-1.911285e-02,-3.929818e-02,-3.164845e-02,1.101845e-02
2,0,3703,0.015653,0.027186,0.000275,0.000064,0.1,0.001,auto,linear,...,-7.533455e-08,-2.758568e-07,-2.604053e-07,-2.331553e-07,-1.326259e-07,-1.289424e-07,-1.350049e-07,-2.011746e-07,-1.530469e-07,8.169558e-08
3,0,3703,0.336511,0.039031,0.019033,0.006068,0.1,0.001,auto,rbf,...,-2.804047e-02,-3.694264e-02,-3.334601e-02,-4.430016e-02,-3.347460e-02,-4.079506e-02,-1.796937e-02,-4.007505e-02,-3.127417e-02,1.089057e-02
4,0,3703,0.035587,0.019617,0.000407,0.000192,0.1,0.01,scale,linear,...,-2.600040e-05,-1.787967e-05,-2.821909e-05,-2.520017e-05,-2.660738e-05,-2.786530e-05,-1.925643e-05,-2.240789e-05,-2.346077e-05,3.737918e-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,2,33327,0.012251,0.010601,0.000674,0.000094,0.1,0.001,auto,linear,...,-5.708498e-08,-1.339092e-07,-9.256985e-08,-1.662342e-07,-2.571048e-07,-2.770463e-07,-1.469856e-07,-1.467244e-07,-1.453224e-07,7.401163e-08
93,2,33327,0.011718,0.010322,0.000676,0.000124,0.1,0.001,scale,linear,...,-5.708498e-08,-1.339092e-07,-9.256985e-08,-1.662342e-07,-2.571048e-07,-2.770463e-07,-1.469856e-07,-1.467244e-07,-1.453224e-07,7.401163e-08
94,3,99981,0.104765,0.056547,0.002395,0.000826,1,0.001,scale,linear,...,-3.950935e-08,-9.745935e-08,-2.463539e-08,-2.178246e-07,-3.942541e-08,-2.310758e-08,-3.510027e-08,-3.488931e-08,-5.911691e-08,5.642787e-08
95,3,99981,0.110273,0.053352,0.001865,0.000277,0.1,0.001,auto,linear,...,-3.950935e-08,-9.745935e-08,-2.463539e-08,-2.178246e-07,-3.942541e-08,-2.310758e-08,-3.510027e-08,-3.488931e-08,-5.911691e-08,5.642787e-08


In [93]:
best_svr = halving_grid_search.best_estimator_
best_params = halving_grid_search.best_params_

ypred = best_svr.predict(Xval)
val_mse = mean_squared_error(ypred, yval)
val_mae = mean_absolute_error(ypred, yval)
val_r2 = r2_score(ypred, yval)

print(f"Best parameters: {best_params}")
print(f"Validation MSE: {val_mse}")
print(f"Validation MAE: {val_mae}")
print(f"Validation R2 Score: {val_r2}")

Best parameters: {'C': 1, 'epsilon': 0.001, 'gamma': 'scale', 'kernel': 'linear'}
Validation MSE: 3.955278405755954e-08
Validation MAE: 0.00015181337345533186
Validation R2 Score: 0.999999960419506


In [94]:
# create folder for models
if not os.path.exists("models"):
    os.makedirs("models")

# save model in models
with open('models/weighted_svr_model_raw_pd.pkl', 'wb') as f:
    pickle.dump(best_svr, f)

## Processed PD - Unweighted

In [95]:
# import data from csv to dataframe
filename = "processed_data_pd.csv"
df = pd.read_csv(f"train_data/{filename}")

# split into input and target features
X = df[['distance_to_road_center', 'angle_from_straight_in_rads', 'reward']].values
y = df['steering_angle'].values

Xtrain, Xval, ytrain, yval = train_test_split(X, y, test_size=0.2, random_state=42)

In [96]:
# grid search for promising hyperparameters
svr = SVR()

param_grid = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto'],
    'epsilon': [1e-3, 1e-2, 1e-1, 1]
}

halving_grid_search = HalvingGridSearchCV(svr, param_grid, scoring='neg_mean_squared_error', cv=10, n_jobs=-1, verbose=2)
halving_grid_search.fit(Xtrain, ytrain)

n_iterations: 4
n_required_iterations: 4
n_possible_iterations: 4
min_resources_: 3161
max_resources_: 85357
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 64
n_resources: 3161
Fitting 10 folds for each of 64 candidates, totalling 640 fits
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] 



[CV] END .....C=10, epsilon=0.001, gamma=auto, kernel=linear; total time= 1.4min
[CV] END .....C=10, epsilon=0.001, gamma=auto, kernel=linear; total time= 1.4min
[CV] END ....C=10, epsilon=0.001, gamma=scale, kernel=linear; total time= 1.4min
[CV] END ....C=100, epsilon=0.001, gamma=auto, kernel=linear; total time=   1.3s
[CV] END ....C=10, epsilon=0.001, gamma=scale, kernel=linear; total time= 1.4min
[CV] END ....C=100, epsilon=0.001, gamma=auto, kernel=linear; total time=   1.4s
[CV] END ....C=100, epsilon=0.001, gamma=auto, kernel=linear; total time=   1.5s
[CV] END ....C=100, epsilon=0.001, gamma=auto, kernel=linear; total time=   1.6s
[CV] END ....C=100, epsilon=0.001, gamma=auto, kernel=linear; total time= 1.3min
[CV] END ....C=100, epsilon=0.001, gamma=auto, kernel=linear; total time=  33.1s
[CV] END .....C=10, epsilon=0.001, gamma=auto, kernel=linear; total time= 2.1min
[CV] END ....C=100, epsilon=0.001, gamma=auto, kernel=linear; total time= 1.5min
[CV] END ....C=10, epsilon=0

In [98]:
halving_grid_search_df = pd.DataFrame(halving_grid_search.cv_results_)
display(halving_grid_search_df)

Unnamed: 0,iter,n_resources,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_epsilon,param_gamma,param_kernel,...,split2_train_score,split3_train_score,split4_train_score,split5_train_score,split6_train_score,split7_train_score,split8_train_score,split9_train_score,mean_train_score,std_train_score
0,0,3161,0.011494,0.005290,0.000933,0.000708,0.1,0.001,scale,linear,...,-1.209139e-06,-2.709472e-07,-7.710621e-07,-2.784239e-07,-1.127453e-07,-5.591394e-07,-2.393724e-07,-1.184925e-07,-3.975573e-07,3.345931e-07
1,0,3161,0.146859,0.022593,0.012839,0.007313,0.1,0.001,scale,rbf,...,-6.197582e-03,-5.326120e-03,-9.559407e-03,-6.967899e-03,-7.058005e-03,-7.907838e-03,-3.513710e-03,-5.993374e-03,-6.508845e-03,1.587342e-03
2,0,3161,0.008109,0.005444,0.000357,0.000045,0.1,0.001,auto,linear,...,-1.209139e-06,-2.709472e-07,-7.710621e-07,-2.784239e-07,-1.127453e-07,-5.591394e-07,-2.393724e-07,-1.184925e-07,-3.975573e-07,3.345931e-07
3,0,3161,0.205055,0.034345,0.017827,0.003428,0.1,0.001,auto,rbf,...,-3.244657e-03,-2.371846e-03,-5.018762e-03,-3.197721e-03,-3.091031e-03,-3.413856e-03,-1.587675e-03,-2.217987e-03,-2.927871e-03,9.043995e-04
4,0,3161,0.028287,0.009450,0.000320,0.000048,0.1,0.01,scale,linear,...,-1.441297e-05,-1.398398e-05,-1.616398e-05,-1.396619e-05,-1.431463e-05,-1.570315e-05,-1.347602e-05,-1.407775e-05,-1.445683e-05,8.045607e-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,2,28449,0.782115,0.785639,0.000872,0.000227,10,0.001,scale,linear,...,-3.969574e-07,-1.502424e-07,-3.285080e-07,-3.585741e-07,-1.831908e-07,-4.907270e-07,-1.537342e-07,-5.074480e-07,-3.329045e-07,1.238149e-07
93,2,28449,0.720543,0.712749,0.001092,0.000849,100,0.001,auto,linear,...,-3.969574e-07,-1.502424e-07,-3.285080e-07,-3.585741e-07,-1.831908e-07,-4.907270e-07,-1.537342e-07,-5.074480e-07,-3.329045e-07,1.238149e-07
94,3,85347,46.300275,51.250140,0.003333,0.001563,10,0.001,auto,linear,...,-3.143866e-07,-1.686724e-07,-2.230433e-07,-2.070061e-07,-2.023878e-07,-1.836826e-07,-1.840874e-07,-2.632465e-07,-2.055991e-07,4.842767e-08
95,3,85347,45.963127,50.513032,0.003661,0.000868,10,0.001,scale,linear,...,-3.143866e-07,-1.686724e-07,-2.230433e-07,-2.070061e-07,-2.023878e-07,-1.836826e-07,-1.840874e-07,-2.632465e-07,-2.055991e-07,4.842767e-08


In [99]:
best_svr = halving_grid_search.best_estimator_
best_params = halving_grid_search.best_params_

ypred = best_svr.predict(Xval)
val_mse = mean_squared_error(ypred, yval)
val_mae = mean_absolute_error(ypred, yval)
val_r2 = r2_score(ypred, yval)

print(f"Best parameters: {best_params}")
print(f"Validation MSE: {val_mse}")
print(f"Validation MAE: {val_mae}")
print(f"Validation R2 Score: {val_r2}")

Best parameters: {'C': 10, 'epsilon': 0.001, 'gamma': 'auto', 'kernel': 'linear'}
Validation MSE: 1.836280391553958e-07
Validation MAE: 0.0002937410014126963
Validation R2 Score: 0.9999995675582726


In [101]:
# create folder for models
if not os.path.exists("models"):
    os.makedirs("models")

# save model in models
with open('models/unweighted_svr_model_processed_pd.pkl', 'wb') as f:
    pickle.dump(best_svr, f)

## Processed PD - Weighted

In [3]:
# import data from csv to dataframe
filename = "processed_data_pd.csv"
df = pd.read_csv(f"train_data/{filename}")

# split into input and target features
X = df[['distance_to_road_center', 'angle_from_straight_in_rads']].values
y = df['steering_angle'].values
y = y.astype(float)
r = df['reward'].values

# normalize rewards to [0, 1] range
norm_r = (r - np.min(r)) / (np.max(r) - np.min(r))

# calculate weights based on normalized rewards
weights = np.where(r < 0, 1 / (1 - norm_r), 1 / (1 + norm_r))

Xtrain, Xval, ytrain, yval, wtrain, wval = train_test_split(X, y, weights, test_size=0.2, random_state=42)

display(df.head())

  weights = np.where(r < 0, 1 / (1 - norm_r), 1 / (1 + norm_r))


Unnamed: 0,steering_angle,distance_to_road_center,angle_from_straight_in_rads,reward
0,0.557262,1.527546,-1.589951,0.003948
1,0.537905,1.506958,-1.590062,0.006862
2,0.50721,1.486314,-1.610694,0.010916
3,0.488253,1.465624,-1.609903,0.013926
4,0.457963,1.444737,-1.629384,0.018048


In [5]:
svr = SVR()

param_grid = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto'],
    'epsilon': [1e-3, 1e-2, 1e-1, 1]
}

halving_grid_search = HalvingGridSearchCV(svr, param_grid, scoring='neg_mean_squared_error', cv=10, n_jobs=-1, verbose=2)
halving_grid_search.fit(Xtrain, ytrain, sample_weight=wtrain)

n_iterations: 4
n_required_iterations: 4
n_possible_iterations: 4
min_resources_: 3161
max_resources_: 85357
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 64
n_resources: 3161
Fitting 10 folds for each of 64 candidates, totalling 640 fits
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...C=0.1, epsilon=0.001, gamma=scale, kernel=linear; total time=   0.0s
[CV] 

In [6]:
halving_grid_search_df = pd.DataFrame(halving_grid_search.cv_results_)
display(halving_grid_search_df)

Unnamed: 0,iter,n_resources,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_epsilon,param_gamma,param_kernel,...,split2_train_score,split3_train_score,split4_train_score,split5_train_score,split6_train_score,split7_train_score,split8_train_score,split9_train_score,mean_train_score,std_train_score
0,0,3161,0.011449,0.004920,0.001300,0.001556,0.1,0.001,scale,linear,...,-1.686744e-07,-1.373221e-07,-1.717422e-07,-2.063299e-07,-1.591303e-07,-1.960543e-07,-1.434334e-07,-1.948903e-07,-1.688136e-07,2.243027e-08
1,0,3161,0.153657,0.023387,0.020031,0.010180,0.1,0.001,scale,rbf,...,-8.260485e-03,-7.215830e-03,-7.507688e-03,-6.025745e-03,-6.175753e-03,-7.754988e-03,-1.076574e-02,-1.050465e-02,-7.863223e-03,1.587527e-03
2,0,3161,0.010300,0.005742,0.000399,0.000101,0.1,0.001,auto,linear,...,-1.686744e-07,-1.373221e-07,-1.717422e-07,-2.063299e-07,-1.591303e-07,-1.960543e-07,-1.434334e-07,-1.948903e-07,-1.688136e-07,2.243027e-08
3,0,3161,0.224869,0.013163,0.020024,0.004808,0.1,0.001,auto,rbf,...,-4.756999e-03,-4.362601e-03,-4.028665e-03,-3.391373e-03,-3.272170e-03,-4.462382e-03,-6.397198e-03,-6.315159e-03,-4.454917e-03,1.052064e-03
4,0,3161,0.033855,0.005875,0.000425,0.000183,0.1,0.01,scale,linear,...,-1.545062e-05,-1.667547e-05,-1.492400e-05,-1.641669e-05,-1.325955e-05,-1.474164e-05,-1.436564e-05,-1.453377e-05,-1.505768e-05,9.671962e-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,2,28449,0.085594,0.057950,0.000911,0.000157,0.1,0.001,auto,linear,...,-2.241585e-07,-1.437756e-07,-1.631498e-07,-1.327995e-07,-1.197101e-07,-1.886966e-07,-1.659260e-07,-1.576006e-07,-1.563072e-07,3.233256e-08
93,2,28449,0.080122,0.061170,0.001569,0.001941,0.1,0.001,scale,linear,...,-2.241585e-07,-1.437756e-07,-1.631498e-07,-1.327995e-07,-1.197101e-07,-1.886966e-07,-1.659260e-07,-1.576006e-07,-1.563072e-07,3.233256e-08
94,3,85347,14.465966,26.327032,0.002235,0.000908,100,0.001,auto,linear,...,-1.710030e-07,-2.112692e-07,-1.975054e-07,-2.175271e-07,-1.900478e-07,-2.115807e-07,-2.014494e-07,-2.165036e-07,-2.027107e-07,1.372449e-08
95,3,85347,0.419316,0.160018,0.002698,0.000143,0.1,0.001,auto,linear,...,-1.398274e-07,-1.733140e-07,-1.232388e-07,-1.380551e-07,-1.207263e-07,-1.375208e-07,-1.144993e-07,-9.973183e-08,-1.379609e-07,2.446356e-08


In [7]:
best_svr = halving_grid_search.best_estimator_
best_params = halving_grid_search.best_params_

ypred = best_svr.predict(Xval)
val_mse = mean_squared_error(ypred, yval)
val_mae = mean_absolute_error(ypred, yval)
val_r2 = r2_score(ypred, yval)

print(f"Best parameters: {best_params}")
print(f"Validation MSE: {val_mse}")
print(f"Validation MAE: {val_mae}")
print(f"Validation R2 Score: {val_r2}")

Best parameters: {'C': 0.1, 'epsilon': 0.001, 'gamma': 'auto', 'kernel': 'linear'}
Validation MSE: 1.7307733700912563e-07
Validation MAE: 0.00032069194199606563
Validation R2 Score: 0.9999995924502597


In [8]:
# create folder for models
if not os.path.exists("models"):
    os.makedirs("models")

# save model in models
with open('models/weighted_svr_model_processed_pd.pkl', 'wb') as f:
    pickle.dump(best_svr, f)