In [1]:
import pandas as pd
import numpy as np
import warnings

from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'
    # path = '/Users/oliverjcarter/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [2]:
data = pd.read_csv(path+'data/ml_input.csv')
processed = pd.read_csv(path+'data/processed.csv')
parameters = pd.read_csv(path+'parameters/svm_classifier.csv')

In [3]:
params_to_drop = ['season', 'round', 'podium', 'driver_points_from']

In [4]:
### Season to test results

N = 2021

In [5]:
df = data.copy()
df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

train = df[df.season < N]

scaler = StandardScaler()
X_train = train.drop(params_to_drop, axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)

y_train = np.asarray(train.podium.values)

In [6]:
def score_classification(model):
    predictions = []
    prob = []
    
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(params_to_drop, axis=1)
        y_test = test.podium

        # Scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

        # Make Predictions
        prediction_df = pd.DataFrame(model.predict_proba(X_test), columns=['proba_0', 'proba_1'])
        prediction_df['actual'] = y_test.reset_index(drop=True)
        prediction_df['predicted'] = prediction_df.proba_1.map(lambda x: 1 if x == prediction_df.proba_1.max() else 0)

        predictions += list(prediction_df['predicted'].values)  
        prob += list(prediction_df['proba_1'].values)

    return predictions, prob

In [7]:
parameters

Unnamed: 0,model,gamma,C,kernel,score
0,svm_classifier,0.023357,10.0,poly,0.526316
1,svm_classifier,0.033598,3.359818,poly,0.526316
2,svm_classifier,0.048329,1.128838,poly,0.526316
3,svm_classifier,0.069519,0.379269,poly,0.526316
4,svm_classifier,0.1,0.127427,poly,0.526316


In [8]:
params = parameters.iloc[0]

gamma = params.gamma
c = params.C
kernel = params.kernel

params

model     svm_classifier
gamma           0.023357
C                   10.0
kernel              poly
score           0.526316
Name: 0, dtype: object

In [9]:
model = svm.SVC(probability=True, gamma=gamma, C=c, kernel=kernel)
model.fit(X_train, y_train)

predictions, probs = score_classification(model)

In [10]:
df = data.copy()
df = df[(df.season == N)]

df = df[['season', 'round', 'podium', 'q_delta', 'starting_grid',
       'driver_points_before', 'constructor_points_before',
       'driver_points_from', 'driver_points_per', 'points_percentage', 'driver_last_3', 'constructor_last_3']]

df['predicted'] = predictions
df['proba_1'] = probs


In [14]:
df.query('predicted == 1')

Unnamed: 0,season,round,podium,q_delta,starting_grid,driver_points_before,constructor_points_before,driver_points_from,driver_points_per,points_percentage,driver_last_3,constructor_last_3,predicted,proba_1
2743,2021,1,2,0.0,1,0.0,0.0,18.0,0.0,0.0,0.0,0.0,1,0.056593
2763,2021,2,2,0.0,1,25.0,41.0,19.0,12.5,60.98,25.0,41.0,1,0.060125
2783,2021,3,3,0.0,1,16.0,60.0,16.0,5.33,26.67,16.0,60.0,1,0.055812
2801,2021,4,1,0.0,1,69.0,101.0,25.0,17.25,68.32,69.0,101.0,1,0.069132
2839,2021,5,20,0.0,1,40.0,60.0,0.0,8.0,66.67,32.0,48.0,1,0.059312
2853,2021,6,15,0.0,2,101.0,148.0,0.0,16.83,68.24,57.0,88.0,1,0.057184
2858,2021,7,1,0.0,1,105.0,174.0,26.0,15.0,60.34,44.0,91.0,1,0.064553
2876,2021,8,1,0.0,1,131.0,215.0,25.0,16.38,60.93,51.0,103.0,1,0.071935
2896,2021,9,1,0.0,1,156.0,252.0,26.0,17.33,61.9,51.0,103.0,1,0.076942
2917,2021,11,2,0.0,1,177.0,285.0,18.0,16.09,62.11,58.0,107.0,1,0.094326


In [11]:
df.query('season == 2021 & round == 4').sort_values('proba_1', ascending=False)

Unnamed: 0,season,round,podium,q_delta,starting_grid,driver_points_before,constructor_points_before,driver_points_from,driver_points_per,points_percentage,driver_last_3,constructor_last_3,predicted,proba_1
2801,2021,4,1,0.0,1,69.0,101.0,25.0,17.25,68.32,69.0,101.0,1,0.069132
2803,2021,4,3,0.132,3,32.0,101.0,15.0,8.0,31.68,32.0,101.0,0,0.053261
2804,2021,4,4,0.769,4,28.0,42.0,12.0,7.0,66.67,28.0,42.0,0,0.052764
2802,2021,4,2,0.036,2,61.0,83.0,19.0,15.25,73.49,61.0,83.0,0,0.052526
2809,2021,4,9,0.839,5,8.0,13.0,2.0,2.0,61.54,8.0,13.0,0,0.052193
2807,2021,4,7,0.879,6,14.0,42.0,6.0,3.5,33.33,14.0,42.0,0,0.050674
2806,2021,4,6,0.881,7,16.0,53.0,8.0,4.0,30.19,16.0,53.0,0,0.05031
2811,2021,4,11,1.233,11,5.0,5.0,0.0,1.25,100.0,5.0,5.0,0,0.04907
2810,2021,4,10,1.241,12,7.0,9.0,1.0,1.75,77.78,7.0,9.0,0,0.04883
2805,2021,4,5,0.96,8,22.0,83.0,10.0,5.5,26.51,22.0,83.0,0,0.047546
