In [30]:
import pandas as pd
import numpy as np
import warnings

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score
from sklearn.feature_selection import RFE

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'
    # path = '/Users/oliverjcarter/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [31]:
data = pd.read_csv(path+'data/ml_input.csv')
processed = pd.read_csv(path+'data/processed.csv')
parameters = pd.read_csv(path+'parameters/svm_classifier.csv')

In [32]:
features = [
    'qualifying_pos', 'starting_grid', 'driver_lewis_hamilton',
    'stage_q3', 'constructor_mercedes', 'driver_points_per', 'constructor_ferrari',
    'driver_max_verstappen', 'constructor_points_before', 'constructor_red_bull',
    'driver_sebastian_vettel', 'driver_points_before', 'driver_wins_before',
    'driver_valtteri_bottas', 'driver_sergio_perez', 
    'constructor_standings_before', 'constructor_wins_before'
    ]

In [33]:
params_to_drop = ['season', 'round', 'podium', 'driver_points_from']

In [34]:
### Season to test results

N = 2021

In [35]:
df = data.copy()
df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

train = df[df.season < N][features + params_to_drop]

scaler = StandardScaler()
X_train = train.drop(params_to_drop, axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)

y_train = np.asarray(train.podium.values)

In [36]:
def score_classification(model):
    predictions = []
    prob = []
    
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)][features + params_to_drop]
        X_test = test.drop(params_to_drop, axis=1)
        y_test = test.podium

        # Scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

        # Make Predictions
        prediction_df = pd.DataFrame(model.predict_proba(X_test), columns=['proba_0', 'proba_1'])
        prediction_df['actual'] = y_test.reset_index(drop=True)
        prediction_df['predicted'] = prediction_df.proba_1.map(lambda x: 1 if x == prediction_df.proba_1.max() else 0)

        predictions += list(prediction_df['predicted'].values)  
        prob += list(prediction_df['proba_1'].values)

    return predictions, prob

In [37]:
parameters.head()

Unnamed: 0,model,gamma,C,kernel,score
0,svm_classifier,0.000886,1.128838,sigmoid,0.684211
1,svm_classifier,0.000886,0.127427,rbf,0.684211
2,svm_classifier,0.000144,0.78476,linear,0.631579
3,svm_classifier,0.000298,0.78476,linear,0.631579
4,svm_classifier,0.000207,0.78476,linear,0.631579


In [38]:
params = parameters.iloc[0]

gamma = params.gamma
c = params.C
kernel = params.kernel

params

model     svm_classifier
gamma           0.000886
C               1.128838
kernel           sigmoid
score           0.684211
Name: 0, dtype: object

In [39]:
model = SVC(probability=True, gamma=gamma, C=c, kernel=kernel)
model.fit(X_train, y_train)

predictions, probs = score_classification(model)

In [40]:
df = processed.copy()
df = df[(df.season == N)]

df['predicted'] = predictions
df['proba_1'] = probs

In [41]:
df.query('predicted == 1')

Unnamed: 0,season,round,circuit_id,driver,constructor,podium,qualifying_pos,stage,q_delta,starting_grid,...,constructor_points_before,constructor_standings_before,driver_points_from,driver_points_per,constructor_points_per,points_percentage,driver_last_3,constructor_last_3,predicted,proba_1
2742,2021,1,bahrain,lewis_hamilton,mercedes,1,2,q3,0.388,2,...,0.0,1,25.0,0.0,0.0,0.0,0.0,0.0,1,0.27158
2763,2021,2,imola,lewis_hamilton,mercedes,2,1,q3,0.0,1,...,41.0,1,19.0,12.5,20.5,60.98,25.0,41.0,1,0.276027
2781,2021,3,portimao,lewis_hamilton,mercedes,1,2,q3,0.007,2,...,60.0,1,25.0,14.67,20.0,73.33,44.0,60.0,1,0.272046
2801,2021,4,catalunya,lewis_hamilton,mercedes,1,1,q3,0.0,1,...,101.0,1,25.0,17.25,25.25,68.32,69.0,101.0,1,0.275892
2827,2021,5,monaco,lewis_hamilton,mercedes,7,7,q3,0.749,7,...,141.0,1,7.0,18.8,28.2,66.67,69.0,100.0,1,0.250317
2853,2021,6,baku,lewis_hamilton,mercedes,15,2,q3,0.0,2,...,148.0,2,0.0,16.83,24.67,68.24,57.0,88.0,1,0.27069
2859,2021,7,ricard,lewis_hamilton,mercedes,2,2,q3,0.0,2,...,148.0,2,18.0,14.43,21.14,68.24,32.0,47.0,1,0.270568
2877,2021,8,red_bull_ring,lewis_hamilton,mercedes,2,3,q3,0.226,2,...,178.0,2,19.0,14.88,22.25,66.85,25.0,37.0,1,0.269226
2899,2021,9,red_bull_ring,lewis_hamilton,mercedes,4,4,q3,0.294,4,...,212.0,2,12.0,15.33,23.56,65.09,37.0,64.0,1,0.262699
2917,2021,11,hungaroring,lewis_hamilton,mercedes,2,1,q3,0.0,1,...,285.0,2,18.0,16.09,25.91,62.11,58.0,107.0,1,0.275075


In [42]:
df.query('season == 2021 & round == 3').sort_values('proba_1', ascending=False)

Unnamed: 0,season,round,circuit_id,driver,constructor,podium,qualifying_pos,stage,q_delta,starting_grid,...,constructor_points_before,constructor_standings_before,driver_points_from,driver_points_per,constructor_points_per,points_percentage,driver_last_3,constructor_last_3,predicted,proba_1
2781,2021,3,portimao,lewis_hamilton,mercedes,1,2,q3,0.007,2,...,60.0,1,25.0,14.67,20.0,73.33,44.0,60.0,1,0.272046
2783,2021,3,portimao,valtteri_bottas,mercedes,3,1,q3,0.0,1,...,60.0,1,16.0,5.33,20.0,26.67,16.0,60.0,0,0.043987
2782,2021,3,portimao,max_verstappen,red_bull,2,3,q3,0.398,3,...,53.0,2,18.0,14.33,17.67,81.13,43.0,53.0,0,0.043334
2784,2021,3,portimao,sergio_perez,red_bull,4,4,q3,0.542,4,...,53.0,2,12.0,3.33,17.67,18.87,10.0,53.0,0,0.043186
2791,2021,3,portimao,carlos_sainz,ferrari,11,5,q3,0.691,5,...,34.0,4,0.0,4.67,11.33,41.18,14.0,34.0,0,0.039852
2787,2021,3,portimao,esteban_ocon,alpine,7,6,q3,0.694,6,...,3.0,7,6.0,0.67,1.0,66.67,2.0,3.0,0,0.038951
2785,2021,3,portimao,lando_norris,mclaren,5,7,q3,0.768,7,...,41.0,3,10.0,9.0,13.67,65.85,27.0,41.0,0,0.038598
2786,2021,3,portimao,charles_leclerc,ferrari,6,8,q3,0.958,8,...,34.0,4,8.0,6.67,11.33,58.82,20.0,34.0,0,0.037401
2790,2021,3,portimao,pierre_gasly,alphatauri,10,9,q3,1.127,9,...,8.0,5,1.0,2.0,2.67,75.0,6.0,8.0,0,0.036689
2793,2021,3,portimao,sebastian_vettel,aston_martin,13,10,q3,1.311,10,...,5.0,6,0.0,0.0,1.67,0.0,0.0,5.0,0,0.03642
