In [6]:
import pandas as pd
import numpy as np
import warnings

from sklearn import svm
from sklearn.preprocessing import StandardScaler

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    # path = '~/Documents/GitHub/f1-analytics/'
    path = '/Users/oliverjcarter/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [34]:
data = pd.read_csv(path+'data/ml_input.csv')
merged = pd.read_csv(path+'data/merged.csv')

In [26]:
df = data.copy()
df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

train = df[df.season != 2021]

scaler = StandardScaler()
X_train = train.drop(['podium','date', 'fastest_lap', 'season', 'round', 'pos'], axis=1)
X_train = np.asarray(pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns))
X_train = np.asarray(X_train)

y_train = np.asarray(train.podium.values)

In [27]:
svm_wins= svm.SVC(kernel="linear",gamma='auto') # Initialize the SVM model
svm_wins.fit(X_train, y_train) # Train the SVM model

In [30]:
df.query('season == 2021 & round == 6')

Unnamed: 0,season,round,date,grid,podium,fastest_lap,driver_points,driver_wins,driver_standings_pos,constructor_points,...,nationality_New Zealander,nationality_Polish,nationality_Russian,nationality_Spanish,nationality_Swedish,nationality_Thai,nationality_Venezuelan,stage_q1,stage_q2,stage_q3
2661,2021,6,2021-06-06 12:00:00,6,1,1:44.687,69,1,3,174,...,0,0,0,0,0,0,0,0,0,1
2662,2021,6,2021-06-06 12:00:00,11,0,1:44.890,28,0,9,37,...,0,0,0,0,0,0,0,0,1,0
2663,2021,6,2021-06-06 12:00:00,4,0,1:45.220,31,0,8,39,...,0,0,0,0,0,0,0,0,0,1
2664,2021,6,2021-06-06 12:00:00,1,0,1:45.382,52,0,5,94,...,0,0,0,0,0,0,0,0,0,1
2665,2021,6,2021-06-06 12:00:00,9,0,1:45.326,66,0,4,92,...,0,0,0,0,0,0,0,0,0,1
2666,2021,6,2021-06-06 12:00:00,8,0,1:45.624,13,0,11,25,...,0,0,0,1,0,0,0,0,0,1
2667,2021,6,2021-06-06 12:00:00,7,0,1:44.939,8,0,14,39,...,0,0,0,0,0,0,0,0,0,1
2668,2021,6,2021-06-06 12:00:00,5,0,1:45.700,42,0,7,94,...,0,0,0,1,0,0,0,0,0,1
2669,2021,6,2021-06-06 12:00:00,13,0,1:45.713,26,0,10,92,...,0,0,0,0,0,0,0,0,1,0
2670,2021,6,2021-06-06 12:00:00,14,0,1:45.601,1,0,15,2,...,0,0,0,0,0,0,0,0,1,0


In [48]:
correct_predictions = []

for i in data.query('season == 2021')['round'].unique():
    query = 'season == 2021 & round == {}'.format(i)

    race = df.query(query).drop(['podium', 'date', 'fastest_lap', 'season', 'round', 'pos'], axis=1)
    race = np.asarray(pd.DataFrame(scaler.fit_transform(race), columns=race.columns))

    labels = np.asarray(df.query(query).podium.values)

    y_pred = svm_wins.predict(race)
    if list(y_pred)== list(labels):
        correct_predictions.append(i)
    print(f'Round:    {i}')
    print(f'Predicted: {y_pred}')
    print(f'Actual:    {labels}')
    print()

Round:    1
Predicted: [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    2
Predicted: [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    3
Predicted: [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    4
Predicted: [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    5
Predicted: [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    6
Predicted: [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    7
Predicted: [1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    8
Predicted: [1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    9
Predicted: [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0

In [50]:
print(correct_predictions)
merged.query('season == 2021 & round in @correct_predictions & podium == 1')

[1, 3, 4, 5, 9, 15, 20, 21]


Unnamed: 0,season,round,circuit_id,country,lat,long,date,driver,nationality,constructor,...,driver_wins,driver_standings_pos,constructor_points,constructor_wins,constructor_standings_pos,pos,final_time,stage,q_delta,driver_age
2570,2021,1,bahrain,Bahrain,26.0325,50.5106,2021-03-28 15:00:00,hamilton,British,mercedes,...,1,1,41,1,1,2,89.385,q3,0.388,36
2604,2021,3,portimao,Portugal,37.227,-8.6267,2021-05-02 14:00:00,hamilton,British,mercedes,...,2,1,101,2,1,2,78.355,q3,0.007,36
2623,2021,4,catalunya,Spain,41.57,2.26111,2021-05-09 13:00:00,hamilton,British,mercedes,...,3,1,141,3,1,1,76.741,q3,0.0,36
2643,2021,5,monaco,Monaco,43.7347,7.42056,2021-05-23 13:00:00,max_verstappen,Dutch,red_bull,...,2,1,149,2,1,2,70.576,q3,0.23,23
2716,2021,9,red_bull_ring,Austria,47.2197,14.7647,2021-07-04 13:00:00,max_verstappen,Dutch,red_bull,...,5,1,286,6,1,1,63.72,q3,0.0,23
2764,2021,15,sochi,Russia,43.4057,39.9578,2021-09-26 12:00:00,hamilton,British,mercedes,...,5,1,397,5,1,4,104.05,q3,2.057,36
2837,2021,20,losail,Qatar,25.49,51.4542,2021-11-21 14:00:00,hamilton,British,mercedes,...,7,2,546,8,1,1,80.827,q3,0.0,36
2857,2021,21,jeddah,Saudi Arabia,21.6319,39.1044,2021-12-05 17:30:00,hamilton,British,mercedes,...,8,2,587,9,1,1,87.511,q3,0.0,36
