In [1]:
import pandas as pd
import numpy as np
import warnings

from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    # path = '~/Documents/GitHub/f1-analytics/'
    path = '/Users/oliverjcarter/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [10]:
data = pd.read_csv(path+'data/ml_input.csv')
merged = pd.read_csv(path+'data/merged.csv')
parameters = pd.read_csv(path+'parameters/svm-classifier.csv').drop('Unnamed: 0', axis=1)

In [11]:
### Season to test results

N = 2022

In [12]:
df = data.copy()
df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

train = df[df.season < N]

scaler = StandardScaler()
X_train = train.drop(['podium'], axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)

y_train = np.asarray(train.podium.values)

In [35]:
def score_classification(model):
    correct_predictions = 0
    predictions = []
    
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(['podium'], axis=1)
        y_test = test.podium

        # Scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

        # Make Predictions
        prediction_df = pd.DataFrame(model.predict_proba(X_test), columns=['proba_0', 'proba_1'])
        prediction_df['actual'] = y_test.reset_index(drop=True)
        prediction_df.sort_values('proba_1', ascending=False, inplace=True)
        prediction_df.reset_index(inplace = True, drop=True)
        prediction_df['predicted'] = prediction_df.index
        prediction_df['predicted'] = prediction_df.predicted.map(lambda x: 1 if x == 0 else 0)


        predictions.append(prediction_df['actual'].values)
        correct_predictions += precision_score(prediction_df.actual, prediction_df.predicted)

    # return correct_predictions / df[df.season == N]['round'].nunique()
    return predictions

In [36]:
params = parameters.head(1)
params

Unnamed: 0,model,gamma,C,kernel,score
0,svm_classifier,0.003793,0.020691,sigmoid,0.666667


In [37]:
gamma = params.gamma.values[0]
c = params.C.values[0]
kernel = params.kernel.values[0]

model_params = (gamma, c, kernel)
model = svm.SVC(probability=True, gamma=gamma, C=c, kernel=kernel)
model.fit(X_train, y_train)

predictions = score_classification(model)

In [39]:
df

Unnamed: 0,season,round,grid,podium,driver_points,driver_wins,driver_standings_pos,constructor_points,constructor_wins,constructor_standings_pos,...,circuit_id_silverstone,circuit_id_sochi,circuit_id_spa,circuit_id_suzuka,circuit_id_villeneuve,circuit_id_yas_marina,circuit_id_zandvoort,stage_q1,stage_q2,stage_q3
0,2014,1,3,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,2014,1,4,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,2014,1,10,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,2014,1,5,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,2014,1,15,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3457,2022,21,10,0,23,0,14,35,0,9,...,0,0,0,0,0,0,0,0,1,0
3458,2022,21,19,0,4,0,19,8,0,10,...,0,0,0,0,0,0,0,0,1,0
3459,2022,21,18,0,2,0,20,8,0,10,...,0,0,0,0,0,0,0,1,0,0
3460,2022,21,0,0,12,0,17,35,0,9,...,0,0,0,0,0,0,0,1,0,0
