In [180]:
import pandas as pd
import numpy as np
import warnings

from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    # path = '~/Documents/GitHub/f1-analytics/'
    path = '/Users/oliverjcarter/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [181]:
data = pd.read_csv(path+'data/ml_input.csv')
processed = pd.read_csv(path+'data/processed.csv')
parameters = pd.read_csv(path+'parameters/svm_classifier.csv').drop('Unnamed: 0', axis=1)

In [182]:
### Season to test results

N = 2020

In [183]:
df = data.copy()
df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

train = df[df.season < N]

scaler = StandardScaler()
X_train = train.drop([
        'season', 'round',
        'podium',
        'driver_points_from_race',
        'constructor_points_from_race', 
        'driver_points_before_race'], axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)

y_train = np.asarray(train.podium.values)

In [184]:
def score_classification(model):
    predictions = []
    
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop([
            'season', 'round',
            'podium',
            'driver_points_from_race',
            'constructor_points_from_race', 
            'driver_points_before_race'], axis=1)
        y_test = test.podium

        # Scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

        # Make Predictions
        prediction_df = pd.DataFrame(model.predict_proba(X_test), columns=['proba_0', 'proba_1'])
        prediction_df['actual'] = y_test.reset_index(drop=True)
        prediction_df['predicted'] = prediction_df.proba_1.map(lambda x: 1 if x == prediction_df.proba_1.max() else 0)


        predictions += list(prediction_df['predicted'].values)

    return predictions

In [185]:
parameters.iloc[2]

model     svm_classifier
gamma           0.000207
C               0.088587
kernel               rbf
score           0.681818
Name: 2, dtype: object

In [186]:
params = parameters.iloc[1]

gamma = params.gamma
c = params.C
kernel = params.kernel

params

model     svm_classifier
gamma           0.000207
C               0.061585
kernel               rbf
score           0.681818
Name: 1, dtype: object

In [187]:
model = svm.SVC(probability=True, gamma=gamma, C=c, kernel=kernel)
model.fit(X_train, y_train)

predictions = score_classification(model)

In [188]:
df = processed[['season', 'round', 'driver', 'starting_grid', 'podium', 'driver_points_per_race', 'driver_points_last_3_races', 'q_delta']].query('season == @N')

df['predicted'] = predictions

In [189]:
df.query('predicted == 1').drop('predicted', axis=1)

Unnamed: 0,season,round,driver,starting_grid,podium,driver_points_per_race,driver_points_last_3_races,q_delta
2421,2020,1,valtteri_bottas,1,1,0.0,0.0,0.0
2441,2020,2,lewis_hamilton,1,1,6.0,12.0,0.0
2463,2020,3,lance_stroll,3,4,2.0,6.0,0.93
2480,2020,4,lewis_hamilton,1,1,15.75,63.0,0.0
2506,2020,5,nico_hulkenberg,3,7,0.0,0.0,0.928
2520,2020,6,lewis_hamilton,1,1,17.83,70.0,0.0
2542,2020,7,max_verstappen,3,3,13.57,62.0,0.526
2561,2020,8,carlos_sainz,3,2,2.88,8.0,0.808
2580,2020,9,lewis_hamilton,1,1,18.22,57.0,0.0
2603,2020,10,sergio_perez,4,4,4.4,12.0,1.013
