In [11]:
# import relevant libraries
import pandas as pd
import numpy as np
import matplotlib as plt
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, RepeatedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error
from project_functions import huber_loss
from sklearn.utils import resample

## Unweighted

In [9]:
# import data from csv to dataframe
filename = "raw_data_pd.csv"
df = pd.read_csv(f"train_data\{filename}")

# split into input and target features
X = df[['distance_to_road_center', 'angle_from_straight_in_rads', 'reward']].values
y = df['steering_angle'].values

Xtrain, Xval, ytrain, yval = train_test_split(X, y, test_size=0.2, random_state=42)

display(df.head())

Unnamed: 0,steering_angle,distance_to_road_center,angle_from_straight_in_rads,reward
0,-2.017271,-1.449541,-1.197695,0.022491
1,-2.017271,-1.449541,-1.197695,0.022491
2,-2.017271,-1.449541,-1.197695,0.022491
3,-2.017271,-1.449541,-1.197695,0.022491
4,-2.017271,-1.449541,-1.197695,0.022279


In [10]:
# downsample
Xtrain, ytrain = resample(Xtrain, ytrain, n_samples=10000, replace=False, random_state=42)

In [14]:
svr = SVR(kernel='poly')
param_grid = {
    'kernel': ['linear', 'poly', 'rbf'],
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto'],
    'degree': [2, 3, 4]
}

rkf = RepeatedKFold(n_splits=5, n_repeats=2, random_state=42)

class DownsampledCV:
    def __init__(self, base_cv, downsample_size):
        self.base_cv = base_cv
        self.downsample_size = downsample_size

    def split(self, X, y=None, groups=None):
        for train_idx, test_idx in self.base_cv.split(X, y, groups):
            X_train_fold, y_train_fold = X[train_idx], y[train_idx]
            if self.downsample_size < len(y_train_fold):
                X_train_fold, y_train_fold = resample(
                    X_train_fold, y_train_fold, 
                    n_samples=self.downsample_size, 
                    replace=False, 
                    random_state=42
                )
            yield train_idx, test_idx

    def get_n_splits(self, X=None, y=None, groups=None):
        return self.base_cv.get_n_splits(X, y, groups)
    
# instantiate the custom cross-validator
downsample_size = 10000
downsampled_cv = DownsampledCV(rkf, downsample_size)

# perform grid search with the custom cross-validator
random_search = RandomizedSearchCV(svr, param_grid, n_iter=10, cv=downsampled_cv, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error'], n_jobs=-1, verbose=2, refit=False)
random_search.fit(Xtrain, ytrain)

Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [None]:
random_search_df = pd.DataFrame(random_search.cv_results_)
display(random_search_df)

In [None]:
# best model
best_svr = grid_search.best_estimator_

# evaluate on validation dataset
ypred = best_svr.predict(Xval)
val_loss = huber_loss(ypred, yval, delta=1.0)

## Weighted

# import data from csv to dataframe
filename = "raw_training_data.csv"
df = pd.read_csv(filename)

# split into input and target features
X = df[['distance_to_road_center', 'angle_from_straight_in_rads']].values
y = df['steering_angle'].values
r = df['reward'].values

Xtrain, Xval, ytrain, yval, wtrain, wval = train_test_split(X, y, r, test_size=0.2, random_state=42)

# resample based on sample weights, i.e. rewards
Xtrain, ytrain = resample(Xtrain, ytrain, n_samples=len(ytrain), random_state=42, stratify=None, replace=True, weights=wtrain)