In [1]:
import pandas as pd
import numpy as np
import warnings

from sklearn import svm

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    # path = '~/Documents/GitHub/f1-analytics/'
    path = '/Users/oliverjcarter/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [2]:
data = pd.read_csv(path+'data/merged.csv')

In [3]:
df = data.copy()
df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

train = df[df.season != 2021]
X_train = np.asarray(train.drop(['driver', 'podium', 'circuit_id','country','date','date_of_birth','nationality','constructor', 'fastest_lap','q1', 'q2', 'q3', 'lat', 'final_time','long', 'season', 'round', 'pos'], axis = 1).values)
y_train = np.asarray(train.podium.values)

In [4]:
svm_wins= svm.SVC(kernel="linear",gamma='auto') # Initialize the SVM model
svm_wins.fit(X_train, y_train) # Train the SVM model

In [5]:
### Checking count of podium winners matches unique races

print(len(df.groupby(['season', 'round']).count()))

print (len(df.query('podium == 1')))

160
160


In [18]:
correct_predictions = []

for i in range(1,22):
    query = 'season == 2021 & round == {}'.format(i)

    race = np.asarray(df.query(query).drop(['driver', 'podium', 'circuit_id','country','date','date_of_birth','nationality','constructor', 'fastest_lap','q1', 'q2', 'q3', 'lat', 'final_time', 'long', 'season', 'round', 'pos'], axis = 1).values)
    labels = np.asarray(df.query(query).podium.values)

    y_pred = svm_wins.predict(race)
    if list(y_pred)== list(labels):
        correct_predictions.append(i)
    print(f'Round:    {i}')
    print(f'Predicted: {y_pred}')
    print(f'Actual:    {labels}')
    print()

Round:    1
Predicted: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    2
Predicted: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    3
Predicted: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    4
Predicted: [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    5
Predicted: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    6
Predicted: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    7
Predicted: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    8
Predicted: [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actual:    [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

Round:    9
Predicted: [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

In [19]:
print(correct_predictions)
df.query('season == 2021 & round in @correct_predictions & podium == 1')

[4, 8, 9, 12, 13, 17, 18]


Unnamed: 0,season,round,circuit_id,country,lat,long,date,driver,date_of_birth,nationality,...,driver_standings_pos,constructor_points,constructor_wins,constructor_standings_pos,pos,final_time,q_delta,q1,q2,q3
2824,2021,4,catalunya,Spain,41.57,2.26111,2021-05-09 13:00:00,hamilton,1985-01-07,British,...,1,141.0,3,1,1,76.741,0.0,78.245,77.166,76.741
2902,2021,8,red_bull_ring,Austria,47.2197,14.7647,2021-06-27 13:00:00,max_verstappen,1997-09-30,Dutch,...,1,252.0,5,1,1,63.841,0.0,64.489,64.433,63.841
2922,2021,9,red_bull_ring,Austria,47.2197,14.7647,2021-07-04 13:00:00,max_verstappen,1997-09-30,Dutch,...,1,286.0,6,1,1,63.72,0.0,64.249,63.927,63.72
2979,2021,12,spa,Belgium,50.4372,5.97139,2021-08-29 13:00:00,max_verstappen,1997-09-30,Dutch,...,2,303.5,7,2,1,119.765,0.0,118.717,116.559,119.765
2997,2021,13,zandvoort,Netherlands,52.3888,4.54092,2021-09-05 13:00:00,max_verstappen,1997-09-30,Dutch,...,1,332.5,8,2,1,68.885,0.0,70.036,69.071,68.885
3073,2021,17,americas,USA,30.1328,-97.6411,2021-10-24 19:00:00,max_verstappen,1997-09-30,Dutch,...,1,437.5,9,2,1,92.91,0.0,94.352,93.464,92.91
3093,2021,18,rodriguez,Mexico,19.4042,-99.0907,2021-11-07 19:00:00,max_verstappen,1997-09-30,Dutch,...,1,477.5,10,2,3,76.225,0.35,76.788,76.483,76.225


In [17]:
test_df = data.copy().drop(['driver', 'podium', 'circuit_id','country','date','date_of_birth','nationality','constructor', 'fastest_lap','q1', 'q2', 'q3', 'lat', 'final_time','long', 'season', 'round', 'pos'], axis = 1)
test_df.shape

(3191, 8)