# Support Vector Regression

## Import Packages

In [1]:
import pandas as pd
import numpy as np
import datetime
from sklearn import svm
from sklearn.model_selection import GridSearchCV

# From: https://towardsdatascience.com/time-based-cross-validation-d259b13d42b8
import timesplit as ts

## Set up helper functions

In [2]:
def GridSearchSummary(search, hyperparams, metrics, file):
    """
    Creates a dataframe with summary results from GridSearchCV
    search - the final GridSearchCV object
    hyperparams - the list of parameters you were tuning for (aka keys in the param_grid dictionary)
    metrics - the list of scoring metrics you used
    file - filename of where you want to save your results
    """
    
    summary = pd.DataFrame()
    
    for hyperparam in hyperparams:
        col = [dct[hyperparam] for dct in search.cv_results_["params"]]
        summary[hyperparam] = col
    
    for metric in metrics:
        col = search.cv_results_["mean_test_" + metric]
        summary[metric] = col
    
    summary.to_csv(file)
    
    return summary

## Import data

In [3]:
X_train = pd.read_csv("../Data/Train-Test Set/X_train.csv", parse_dates=["date"])
y_train = pd.read_csv("../Data/Train-Test Set/y_train.csv")

X_test = pd.read_csv("../Data/Train-Test Set/X_test.csv", parse_dates=["date"])
y_test = pd.read_csv("../Data/Train-Test Set/y_test.csv")

## Get indices for time-based cross validation

In [4]:
# set up time-based CV indices
tscv = ts.TimeBasedCV(train_period=21,
                      test_period=7,
                      freq='days')

tscv_indices = []

for train_index, test_index in tscv.split(X_train, date_column='date'):
    tscv_indices.append((train_index, test_index))

## Run GridSearchCV using Support Vector Regression

In [5]:
# define scoring metrics
scoring_metrics = ["neg_mean_squared_error", "r2", "neg_mean_absolute_error"]

In [10]:
param_grid = {
    'kernel': ["linear", "poly", "rbf"],
    'degree': [2, 3],
    'gamma': ["scale", "auto"],
    'C': [0.5, 1, 5]
}

print("Start: ", datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"))

SVR_search = GridSearchCV(svm.SVR(), param_grid, cv=tscv_indices, scoring=scoring_metrics, refit = False, verbose=1)
SVR_search.fit(X_train.drop(columns=["date"]), np.array(y_train).ravel())

print("End: ", datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"))

Start:  28/05/2021 13:53:01
Fitting 32 folds for each of 36 candidates, totalling 1152 fits
End:  28/05/2021 14:23:30


In [13]:
SVR_summary = GridSearchSummary(SVR_search, 
                  ["kernel", "gamma", "degree", "C"],
                  scoring_metrics,
                  "../Data/Model Results/SVR_search.csv")

SVR_summary.sort_values(by=["neg_mean_absolute_error"])

Unnamed: 0,kernel,gamma,degree,C,neg_mean_squared_error,r2,neg_mean_absolute_error
34,poly,auto,3,5.0,-372130.043208,-4.544312,-25.75354
31,poly,scale,3,5.0,-279705.777552,-3.38391,-25.50049
22,poly,auto,3,1.0,-71377.401155,-0.379803,-22.511241
19,poly,scale,3,1.0,-48720.511491,-0.1393,-22.081667
7,poly,scale,3,0.5,-17681.237018,0.303685,-21.381395
10,poly,auto,3,0.5,-24453.974115,0.298678,-21.26061
5,rbf,auto,2,0.5,-17793.667766,0.186484,-20.584071
11,rbf,auto,3,0.5,-17793.667766,0.186484,-20.584071
2,rbf,scale,2,0.5,-17609.165529,0.186599,-20.346555
8,rbf,scale,3,0.5,-17609.165529,0.186599,-20.346555


## Run GridSearchCV using LinearSVR

In [19]:
# define scoring metrics
scoring_metrics = ["neg_mean_squared_error", "r2", "neg_mean_absolute_error"]

In [21]:
from sklearn.pipeline import make_pipeline
from sklearn.feature_selection import PolynomialFeatures

final_svm = make_pipeline(PolynomialFeatures(), 
                          svm.SVR(kernel="linear", C=5, gamma='scale', degree=3))
param_grid = {
    'loss': ["epsilon_insensitive", "squared_epsilon_insensitive"],
    'C': [0.1, 0.5, 1, 5, 10, 20]
}

print("Start: ", datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"))

linSVR_search = GridSearchCV(svm.LinearSVR(max_iter=100000), param_grid, cv=tscv_indices, 
                             scoring=scoring_metrics, refit = False, verbose=2)
linSVR_search.fit(X_train.drop(columns=["date"]), np.array(y_train).ravel())

print("End: ", datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"))

Start:  28/05/2021 20:05:41
Fitting 32 folds for each of 12 candidates, totalling 384 fits
[CV] END ....................C=0.1, loss=epsilon_insensitive; total time=   0.0s
[CV] END ....................C=0.1, loss=epsilon_insensitive; total time=   0.1s
[CV] END ....................C=0.1, loss=epsilon_insensitive; total time=   0.0s
[CV] END ....................C=0.1, loss=epsilon_insensitive; total time=   0.0s
[CV] END ....................C=0.1, loss=epsilon_insensitive; total time=   0.0s
[CV] END ....................C=0.1, loss=epsilon_insensitive; total time=   0.0s
[CV] END ....................C=0.1, loss=epsilon_insensitive; total time=   0.0s
[CV] END ....................C=0.1, loss=epsilon_insensitive; total time=   0.0s
[CV] END ....................C=0.1, loss=epsilon_insensitive; total time=   0.0s
[CV] END ....................C=0.1, loss=epsilon_insensitive; total time=   0.0s
[CV] END ....................C=0.1, loss=epsilon_insensitive; total time=   0.0s
[CV] END .........

[CV] END ............C=0.5, loss=squared_epsilon_insensitive; total time=   2.9s
[CV] END ............C=0.5, loss=squared_epsilon_insensitive; total time=   3.0s
[CV] END ............C=0.5, loss=squared_epsilon_insensitive; total time=   3.6s
[CV] END ............C=0.5, loss=squared_epsilon_insensitive; total time=   4.4s
[CV] END ............C=0.5, loss=squared_epsilon_insensitive; total time=   4.0s
[CV] END ............C=0.5, loss=squared_epsilon_insensitive; total time=   4.7s
[CV] END ............C=0.5, loss=squared_epsilon_insensitive; total time=   8.8s
[CV] END ............C=0.5, loss=squared_epsilon_insensitive; total time=  12.5s
[CV] END ............C=0.5, loss=squared_epsilon_insensitive; total time=  12.3s
[CV] END ............C=0.5, loss=squared_epsilon_insensitive; total time=  12.6s
[CV] END ............C=0.5, loss=squared_epsilon_insensitive; total time=  14.1s
[CV] END ............C=0.5, loss=squared_epsilon_insensitive; total time=  13.1s
[CV] END ............C=0.5, 

[CV] END ......................C=5, loss=epsilon_insensitive; total time=   0.4s
[CV] END ......................C=5, loss=epsilon_insensitive; total time=   0.3s
[CV] END ......................C=5, loss=epsilon_insensitive; total time=   0.4s
[CV] END ......................C=5, loss=epsilon_insensitive; total time=   0.4s
[CV] END ......................C=5, loss=epsilon_insensitive; total time=   0.6s
[CV] END ......................C=5, loss=epsilon_insensitive; total time=   0.2s
[CV] END ......................C=5, loss=epsilon_insensitive; total time=   0.4s
[CV] END ......................C=5, loss=epsilon_insensitive; total time=   0.3s
[CV] END ......................C=5, loss=epsilon_insensitive; total time=   0.3s
[CV] END ......................C=5, loss=epsilon_insensitive; total time=   0.4s
[CV] END ......................C=5, loss=epsilon_insensitive; total time=   0.2s
[CV] END ......................C=5, loss=epsilon_insensitive; total time=   0.5s
[CV] END ...................



[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time= 1.2min




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time= 1.0min




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time= 1.5min




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  48.4s




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time= 1.1min




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  47.7s




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  44.9s




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  50.4s




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  41.1s




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  41.6s




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  41.9s




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  47.0s




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  42.2s




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  42.6s




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  32.7s




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  32.6s




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  32.4s




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  33.0s
[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  31.2s
[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  30.9s




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  32.8s




[CV] END ..............C=5, loss=squared_epsilon_insensitive; total time=  32.8s
[CV] END .....................C=10, loss=epsilon_insensitive; total time=   1.1s
[CV] END .....................C=10, loss=epsilon_insensitive; total time=   0.5s
[CV] END .....................C=10, loss=epsilon_insensitive; total time=   0.7s
[CV] END .....................C=10, loss=epsilon_insensitive; total time=   0.4s
[CV] END .....................C=10, loss=epsilon_insensitive; total time=   0.7s
[CV] END .....................C=10, loss=epsilon_insensitive; total time=   0.3s
[CV] END .....................C=10, loss=epsilon_insensitive; total time=   0.6s
[CV] END .....................C=10, loss=epsilon_insensitive; total time=   0.7s
[CV] END .....................C=10, loss=epsilon_insensitive; total time=   0.2s
[CV] END .....................C=10, loss=epsilon_insensitive; total time=   0.3s
[CV] END .....................C=10, loss=epsilon_insensitive; total time=   0.4s
[CV] END ...................



[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  32.3s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  32.2s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  32.3s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  32.2s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  32.8s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  45.2s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  39.1s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  48.5s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  40.3s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  45.0s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  40.1s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  41.4s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  56.8s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  41.0s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  34.5s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  37.3s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  36.5s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  37.2s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  37.1s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  34.4s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  39.6s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  36.2s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  33.8s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  33.6s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  34.9s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  34.6s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  41.6s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  38.6s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  38.6s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  37.2s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  34.6s




[CV] END .............C=10, loss=squared_epsilon_insensitive; total time=  35.8s
[CV] END .....................C=20, loss=epsilon_insensitive; total time=   1.6s
[CV] END .....................C=20, loss=epsilon_insensitive; total time=   1.1s
[CV] END .....................C=20, loss=epsilon_insensitive; total time=   0.7s
[CV] END .....................C=20, loss=epsilon_insensitive; total time=   0.7s
[CV] END .....................C=20, loss=epsilon_insensitive; total time=   1.4s
[CV] END .....................C=20, loss=epsilon_insensitive; total time=   0.7s
[CV] END .....................C=20, loss=epsilon_insensitive; total time=   1.0s
[CV] END .....................C=20, loss=epsilon_insensitive; total time=   0.9s
[CV] END .....................C=20, loss=epsilon_insensitive; total time=   0.3s
[CV] END .....................C=20, loss=epsilon_insensitive; total time=   0.5s
[CV] END .....................C=20, loss=epsilon_insensitive; total time=   0.6s
[CV] END ...................



[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  33.9s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  35.9s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  40.5s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  40.9s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  41.3s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  38.8s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  36.4s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  36.9s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  43.9s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  37.0s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  38.3s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  38.0s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  43.4s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  38.2s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  38.1s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  39.7s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  41.2s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  39.7s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  36.5s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  37.9s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  38.8s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  37.9s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  34.9s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  41.0s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  35.5s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  38.4s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  33.4s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  36.8s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  36.6s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  41.0s




[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  37.5s
[CV] END .............C=20, loss=squared_epsilon_insensitive; total time=  38.1s
End:  28/05/2021 21:32:56




Some of the above specifications do not converge well...

In [22]:
linSVR_summary = GridSearchSummary(linSVR_search, 
                  ["loss", "C"],
                  scoring_metrics,
                  "../Data/Model Results/linSVR_search.csv")

linSVR_summary.sort_values(by=["neg_mean_absolute_error"])

Unnamed: 0,loss,C,neg_mean_squared_error,r2,neg_mean_absolute_error
0,epsilon_insensitive,0.1,-1568.694805,0.918079,-9.065565
7,squared_epsilon_insensitive,5.0,-1499.993081,0.936499,-8.829391
5,squared_epsilon_insensitive,1.0,-1494.50156,0.936631,-8.810542
3,squared_epsilon_insensitive,0.5,-1488.801797,0.936708,-8.786762
9,squared_epsilon_insensitive,10.0,-1488.542567,0.936787,-8.77914
1,squared_epsilon_insensitive,0.1,-1461.388707,0.936282,-8.729371
11,squared_epsilon_insensitive,20.0,-1462.405021,0.937897,-8.648529
2,epsilon_insensitive,0.5,-1225.799027,0.936531,-8.056295
4,epsilon_insensitive,1.0,-1285.690041,0.938027,-7.988969
6,epsilon_insensitive,5.0,-1298.109046,0.941365,-7.866373
