In [45]:
import pandas as pd
import numpy as np
import warnings

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score
from sklearn.feature_selection import RFE

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'
    # path = '/Users/oliverjcarter/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [46]:
data = pd.read_csv(path+'data/ml_input.csv')
processed = pd.read_csv(path+'data/processed.csv')
parameters = pd.read_csv(path+'parameters/svm_classifier.csv')

In [47]:
params_to_drop = ['season', 'round', 'driver', 'constructor', 'circuit_id', 'podium', 'driver_points_from']

In [48]:
### Season to test results

N = 2021

In [49]:
df = data.copy()
df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

train = df[df.season < N]

scaler = StandardScaler()
X_train = train.drop(params_to_drop, axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)

y_train = np.asarray(train.podium.values)

In [50]:
def score_classification(model, X_train, y_train):
    predictions = []
    prob = []
    
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(params_to_drop, axis=1)
        y_test = test.podium

        # Scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

        # Make Predictions
        prediction_df = pd.DataFrame(model.predict_proba(X_test), columns=['proba_0', 'proba_1'])
        prediction_df['actual'] = y_test.reset_index(drop=True)
        prediction_df['predicted'] = prediction_df.proba_1.map(lambda x: 1 if x == prediction_df.proba_1.max() else 0)

        predictions += list(prediction_df['predicted'].values)  
        prob += list(prediction_df['proba_1'].values)

        # Retrain model
        # X_train = pd.concat([X_train, X_test])
        # y_train = np.append(y_train, y_test)

        # model.fit(X_train, y_train)

    return predictions, prob

In [51]:
parameters.head()

Unnamed: 0,model,gamma,C,kernel,score
0,svm_classifier,0.1,10.0,rbf,0.736842
1,svm_classifier,0.1,6.951928,rbf,0.736842
2,svm_classifier,0.069519,10.0,rbf,0.736842
3,svm_classifier,0.000428,2.335721,linear,0.684211
4,svm_classifier,0.000298,2.335721,linear,0.684211


In [52]:
params = parameters.iloc[0]

gamma = params.gamma
c = params.C
kernel = params.kernel

params

model     svm_classifier
gamma                0.1
C                   10.0
kernel               rbf
score           0.736842
Name: 0, dtype: object

In [53]:
model = SVC(probability=True, gamma=gamma, C=c, kernel=kernel)
model.fit(X_train, y_train)

predictions, probs = score_classification(model, X_train, y_train)

In [54]:
df = processed.copy()
df = df[(df.season == N)]

df['predicted'] = predictions
df['proba_1'] = probs

In [58]:
df.query('predicted == 1 & podium == 1')

Unnamed: 0,season,round,circuit_id,driver,constructor,podium,qualifying_pos,stage,q_delta,starting_grid,...,constructor_points_before,constructor_standings_before,driver_points_from,driver_points_per,constructor_points_per,points_percentage,driver_last_3,constructor_last_3,predicted,proba_1
2764,2021,1,bahrain,lewis_hamilton,mercedes,1,2,q3,0.388,2,...,0.0,1,25.0,0.0,0.0,0.0,0.0,0.0,1,0.042844
2824,2021,4,catalunya,lewis_hamilton,mercedes,1,1,q3,0.0,1,...,101.0,1,25.0,17.25,25.25,68.32,69.0,101.0,1,0.350336
2844,2021,5,monaco,max_verstappen,red_bull,1,2,q3,0.23,2,...,112.0,2,25.0,16.0,22.4,71.43,62.0,84.0,1,0.061238
2883,2021,7,ricard,max_verstappen,red_bull,1,1,q3,0.0,1,...,174.0,1,26.0,15.0,24.86,60.34,44.0,91.0,1,0.392424
2902,2021,8,red_bull_ring,max_verstappen,red_bull,1,1,q3,0.0,1,...,215.0,1,25.0,16.38,26.88,60.93,51.0,103.0,1,0.242312
2922,2021,9,red_bull_ring,max_verstappen,red_bull,1,1,q3,0.0,1,...,252.0,1,26.0,17.33,28.0,61.9,51.0,103.0,1,0.121678
2962,2021,12,spa,max_verstappen,red_bull,1,1,q3,0.0,1,...,291.0,2,12.0,15.58,24.25,64.26,31.0,39.0,1,0.069107
2982,2021,13,zandvoort,max_verstappen,red_bull,1,1,q3,0.0,1,...,303.0,2,25.0,15.31,23.31,65.68,17.0,17.0,1,0.097697
3000,2021,15,sochi,lewis_hamilton,mercedes,1,4,q3,1.584,4,...,362.0,1,25.0,14.73,24.13,61.05,26.0,59.0,1,0.086686
3040,2021,17,americas,max_verstappen,red_bull,1,1,q3,0.0,1,...,397.0,2,25.0,15.41,23.35,65.99,38.0,65.0,1,0.278527
