In [9]:
import pandas as pd
import numpy as np
import warnings

from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, precision_score

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    # path = '~/Documents/GitHub/f1-analytics/'
    path = '/Users/oliverjcarter/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [10]:
data = pd.read_csv(path+'data/ml_input.csv')
merged = pd.read_csv(path+'data/merged.csv')

In [28]:
df = data.copy()
df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

train = df[df.season != 2021]

scaler = StandardScaler()
X_train = train.drop(['podium','date', 'fastest_lap', 'pos'], axis=1)
# X_train = train.drop(['podium','date', 'fastest_lap', 'season', 'round', 'pos'], axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
# X_train = np.asarray(X_train)

y_train = np.asarray(train.podium.values)

In [89]:
def score_classification(model):
    score = 0
    correct_rounds = []
    predictions = []
    for circuit in df[df.season == 2021]['round'].unique():

        test = df[(df.season == 2021) & (df['round'] == circuit)]
        X_test = test.drop(['podium', 'date', 'fastest_lap', 'pos'], axis = 1)
        y_test = test.podium

        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

        # make predictions
        prediction_df = pd.DataFrame(model.predict_proba(X_test), columns=['proba_0', 'proba_1'])
        prediction_df['actual'] = y_test.reset_index(drop=True)
        prediction_df.sort_values('proba_1', ascending=False, inplace=True)
        prediction_df.reset_index(inplace=True, drop=True)
        prediction_df['predicted'] = prediction_df.index
        prediction_df['predicted'] = prediction_df.predicted.map(lambda x: 1 if x == 0 else 0)

        predictions.append(list(prediction_df['actual'].values))

        score += precision_score(prediction_df.actual, prediction_df.predicted)

    model_score = score / df[df.season == 2021]['round'].unique().max()
    return model_score, predictions

In [90]:
svm_wins= svm.SVC(probability=True, kernel="linear",gamma='auto')   # Initialize the SVM model

svm_wins.fit(X_train, y_train)                                      # Train the SVM model

In [100]:
model_score, predictions = score_classification(svm_wins)

for circuit in df[df.season == 2021]['round'].unique():
    test = merged[(merged.season == 2021) & (merged['round'] == circuit) & (merged.podium == 1)]

    print('Round: {}         Grid: {}            Finish: {}         Driver: {}'.format(circuit, test.grid.values, test.podium.values, test.driver.values[0]))

Round: 1         Grid: [2]            Finish: [1]         Driver: hamilton
Round: 2         Grid: [3]            Finish: [1]         Driver: max_verstappen
Round: 3         Grid: [2]            Finish: [1]         Driver: hamilton
Round: 4         Grid: [1]            Finish: [1]         Driver: hamilton
Round: 5         Grid: [2]            Finish: [1]         Driver: max_verstappen
Round: 6         Grid: [6]            Finish: [1]         Driver: perez
Round: 7         Grid: [1]            Finish: [1]         Driver: max_verstappen
Round: 8         Grid: [1]            Finish: [1]         Driver: max_verstappen
Round: 9         Grid: [1]            Finish: [1]         Driver: max_verstappen
Round: 11         Grid: [8]            Finish: [1]         Driver: ocon
Round: 13         Grid: [1]            Finish: [1]         Driver: max_verstappen
Round: 15         Grid: [4]            Finish: [1]         Driver: hamilton
Round: 16         Grid: [1]            Finish: [1]         Driver: b

In [35]:
correct_predictions = []

for i in data.query('season == 2021')['round'].unique():
    query = 'season == 2021 & round == {}'.format(i)

    race = df.query(query).drop(['podium', 'date', 'fastest_lap', 'pos'], axis=1)
    # race = df.query(query).drop(['podium', 'date', 'fastest_lap', 'season', 'round', 'pos'], axis=1)
    race = np.asarray(pd.DataFrame(scaler.fit_transform(race), columns=race.columns))

    labels = np.asarray(df.query(query).podium.values)

    y_pred = svm_wins.predict(race)
    if list(y_pred)== list(labels):
        correct_predictions.append(i)
    print(f'Round:    {i}')
    print(f'Predicted: {y_pred}')
    print(f'Actual:    {labels}')
    print()



Round:    1
Predicted: [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    2
Predicted: [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    3
Predicted: [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    4
Predicted: [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    5
Predicted: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    6
Predicted: [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    7
Predicted: [1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    8
Predicted: [1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    9
Predicted: [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0



In [36]:
print(correct_predictions)
merged.query('season == 2021 & round in @correct_predictions & podium == 1')

[1, 3, 4, 9, 15, 20, 21]


Unnamed: 0,season,round,circuit_id,country,lat,long,date,driver,nationality,constructor,...,driver_wins,driver_standings_pos,constructor_points,constructor_wins,constructor_standings_pos,pos,final_time,stage,q_delta,driver_age
2570,2021,1,bahrain,Bahrain,26.0325,50.5106,2021-03-28 15:00:00,hamilton,British,mercedes,...,1,1,41,1,1,2,89.385,q3,0.388,36
2604,2021,3,portimao,Portugal,37.227,-8.6267,2021-05-02 14:00:00,hamilton,British,mercedes,...,2,1,101,2,1,2,78.355,q3,0.007,36
2623,2021,4,catalunya,Spain,41.57,2.26111,2021-05-09 13:00:00,hamilton,British,mercedes,...,3,1,141,3,1,1,76.741,q3,0.0,36
2716,2021,9,red_bull_ring,Austria,47.2197,14.7647,2021-07-04 13:00:00,max_verstappen,Dutch,red_bull,...,5,1,286,6,1,1,63.72,q3,0.0,23
2764,2021,15,sochi,Russia,43.4057,39.9578,2021-09-26 12:00:00,hamilton,British,mercedes,...,5,1,397,5,1,4,104.05,q3,2.057,36
2837,2021,20,losail,Qatar,25.49,51.4542,2021-11-21 14:00:00,hamilton,British,mercedes,...,7,2,546,8,1,1,80.827,q3,0.0,36
2857,2021,21,jeddah,Saudi Arabia,21.6319,39.1044,2021-12-05 17:30:00,hamilton,British,mercedes,...,8,2,587,9,1,1,87.511,q3,0.0,36
