In [1448]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import transforms
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import torch.nn.functional as F

In [1449]:
data = pd.read_csv('data/final_df.csv')

In [1450]:
df = data.copy()
df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

train = df[df.season <2019]
train.head()

Unnamed: 0,season,round,country,url_x,driver_x,grid,podium,url_y,driver_points,driver_wins,...,nationality_French,nationality_German,nationality_Spanish,constructor_ferrari,constructor_force_india,constructor_mclaren,constructor_mercedes,constructor_red_bull,constructor_renault,constructor_williams
0,2006,1,Bahrain,http://en.wikipedia.org/wiki/2006_Bahrain_Gran...,michael_schumacher,1,0,http://en.wikipedia.org/wiki/2006_Bahrain_Gran...,8,0,...,False,True,False,True,False,False,False,False,False,False
1,2006,1,Bahrain,http://en.wikipedia.org/wiki/2006_Bahrain_Gran...,massa,2,0,http://en.wikipedia.org/wiki/2006_Bahrain_Gran...,0,0,...,False,False,False,True,False,False,False,False,False,False
2,2006,1,Bahrain,http://en.wikipedia.org/wiki/2006_Bahrain_Gran...,button,3,0,http://en.wikipedia.org/wiki/2006_Bahrain_Gran...,5,0,...,False,False,False,False,False,False,False,False,False,False
3,2006,1,Bahrain,http://en.wikipedia.org/wiki/2006_Bahrain_Gran...,alonso,4,1,http://en.wikipedia.org/wiki/2006_Bahrain_Gran...,10,1,...,False,False,True,False,False,False,False,False,True,False
4,2006,1,Bahrain,http://en.wikipedia.org/wiki/2006_Bahrain_Gran...,montoya,5,0,http://en.wikipedia.org/wiki/2006_Bahrain_Gran...,4,0,...,False,False,False,False,False,True,False,False,False,False


In [1451]:
X_train = train.drop(['driver_x', 'country', 'podium', 'url_x', 'url_y', 'driver_y'], axis = 1)
y_train = train.podium

scaler = StandardScaler()
y_train.head()

0    0
1    0
2    0
3    1
4    0
Name: podium, dtype: int64

In [1452]:
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns = X_train.columns)

In [1453]:
class MLP(nn.Module):
  '''
    Multilayer Perceptron.
  '''
  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential (
        nn.Flatten(),
        nn.Linear(47, 75),
        nn.ReLU(),
        nn.Linear(75, 25),
        nn.ReLU(),
        nn.Linear(25,10)
    )

  def forward(self, x):
    '''Forward pass'''
    return self.layers(x)

In [1454]:
X_train = torch.FloatTensor(X_train.to_numpy())
y_train = torch.LongTensor(y_train)

In [1455]:
torch.manual_seed(42)
model = MLP()
model

MLP(
  (layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=47, out_features=75, bias=True)
    (2): ReLU()
    (3): Linear(in_features=75, out_features=25, bias=True)
    (4): ReLU()
    (5): Linear(in_features=25, out_features=10, bias=True)
  )
)

In [1456]:
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), 
                            lr=1e-4)

In [1457]:

# Train our model!
# Epochs? (one run thru all the training data in our network)
epochs = 2000
losses = []
for i in range(epochs):
    optimizer.zero_grad()
    y_pred = model(X_train)
    loss = loss_function(y_pred, y_train) # predicted values vs the y_train
    loss.backward()
    optimizer.step()
    
    if i % 10 == 0:
        print(f'Epoch: {i} and loss: {loss}')

Epoch: 0 and loss: 2.1283297538757324
Epoch: 10 and loss: 2.0977118015289307
Epoch: 20 and loss: 2.0672767162323
Epoch: 30 and loss: 2.036799192428589
Epoch: 40 and loss: 2.0060231685638428
Epoch: 50 and loss: 1.9746789932250977
Epoch: 60 and loss: 1.942529320716858
Epoch: 70 and loss: 1.90923011302948
Epoch: 80 and loss: 1.8746285438537598
Epoch: 90 and loss: 1.8385237455368042
Epoch: 100 and loss: 1.800746202468872
Epoch: 110 and loss: 1.7611677646636963
Epoch: 120 and loss: 1.7196108102798462
Epoch: 130 and loss: 1.6759260892868042
Epoch: 140 and loss: 1.6300727128982544
Epoch: 150 and loss: 1.5820943117141724
Epoch: 160 and loss: 1.532013177871704
Epoch: 170 and loss: 1.4799139499664307
Epoch: 180 and loss: 1.425998330116272
Epoch: 190 and loss: 1.3705828189849854
Epoch: 200 and loss: 1.3140714168548584
Epoch: 210 and loss: 1.256737232208252
Epoch: 220 and loss: 1.1989731788635254
Epoch: 230 and loss: 1.1412169933319092
Epoch: 240 and loss: 1.0839619636535645
Epoch: 250 and loss: 1

In [1472]:
def scorecard(season):
    df = data.copy()
    df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)
    score = 0
    count = 0
    for circuit in df[df.season == season]['round'].unique():
        count +=1
        test = df[(df.season == season) & (df['round'] == circuit) & (df['grid'] == 1)]
    
        winner = data[(data.season == season) & (data['round'] == circuit) & (data['podium'] == 1)].grid
        try:
            winner = winner.to_numpy()[0]
        except:
            winner = None
        
        X_test = test.drop(['driver_x', 'country', 'podium', 'url_x', 'url_y', 'driver_y'], axis = 1)
        y_test = test.podium
        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns = X_test.columns)
        X_test = torch.Tensor(X_test.to_numpy())
        with torch.no_grad():
            model.eval()
            prediction = model(X_test)
            prob = F.softmax(prediction, dim=1)
            top_p, top_class = prob.topk(1, dim = 1)
            if prediction.argmax().item() == y_test.to_numpy()[0]:
                score+=1
                print(f'CORRECTLY predicted the pole to {"win" if prediction.argmax().item() == 1 else "lose"} with {top_p}% confidence')
            else:
                print(f'INCORRECTLY predicted the pole to {"win" if prediction.argmax().item() == 1 else "lose"} with {top_p}% confidence')

    print(f'{score} out of {count} races')

In [1467]:
def scorecard_ts(season):
    df = data.copy()
    df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)
    score = 0
    count = 0
    predicted = 0
    for circuit in df[df.season == season]['round'].unique():
        count +=1
        winner = data[(data.season == season) & (data['round'] == circuit) & (data['podium'] == 1)].grid
        try:
            winner = winner.to_numpy()[0]
        except:
            winner = None
        model.eval()
        for grid in range(20):
            test = df[(df.season == season) & (df['round'] == circuit) & (df['grid'] == grid + 1)]  
            X_test = test.drop(['driver_x', 'country', 'podium', 'url_x', 'url_y', 'driver_y'], axis = 1)
            try:
                X_test = pd.DataFrame(scaler.transform(X_test), columns = X_test.columns)
            except:
                continue
            X_test = torch.Tensor(X_test.to_numpy())
            prediction = model(X_test)
            if prediction.argmax().item():
                predicted+=1
                print(f'{"CORRECTLY" if grid+1 == winner else "INCORRECTLY"} predicted. P{grid+1} should win in round {circuit} (actual winner was P{winner})')
                score+= 1 if grid+1 == winner else 0
                break

    print(f'{score} out of {predicted} predictedraces')

In [1505]:
def scorecard_pole(season):
    score = 0
    count = 0
    predicted = 0
    for circuit in df[df.season == season]['round'].unique():
        count +=1
        try:
            winner = data[(data.season == season) & (data['round'] == circuit) & (data['podium'] == 1)].grid.to_numpy()[0]
        except:
            continue
        if winner == 1:
            score+=1
    print(f'{score}')

In [1506]:
scorecard_ts(2023)

CORRECTLY predicted. P1 should win in round 1 (actual winner was P1)
CORRECTLY predicted. P1 should win in round 2 (actual winner was P1)
CORRECTLY predicted. P1 should win in round 3 (actual winner was P1)
INCORRECTLY predicted. P2 should win in round 4 (actual winner was P3)
INCORRECTLY predicted. P1 should win in round 5 (actual winner was PNone)
CORRECTLY predicted. P1 should win in round 6 (actual winner was P1)
CORRECTLY predicted. P1 should win in round 7 (actual winner was P1)
CORRECTLY predicted. P1 should win in round 8 (actual winner was P1)
CORRECTLY predicted. P1 should win in round 9 (actual winner was P1)
CORRECTLY predicted. P1 should win in round 10 (actual winner was P1)
CORRECTLY predicted. P2 should win in round 11 (actual winner was P2)
CORRECTLY predicted. P6 should win in round 12 (actual winner was P6)
CORRECTLY predicted. P1 should win in round 13 (actual winner was P1)
CORRECTLY predicted. P2 should win in round 14 (actual winner was P2)
CORRECTLY predicted. P

In [1507]:
scorecard(2023)

CORRECTLY predicted the pole to win with tensor([[0.7993]])% confidence
CORRECTLY predicted the pole to win with tensor([[0.5779]])% confidence
CORRECTLY predicted the pole to win with tensor([[0.8604]])% confidence
CORRECTLY predicted the pole to lose with tensor([[0.8583]])% confidence
INCORRECTLY predicted the pole to win with tensor([[0.6558]])% confidence
CORRECTLY predicted the pole to win with tensor([[0.8579]])% confidence
CORRECTLY predicted the pole to win with tensor([[0.9527]])% confidence
CORRECTLY predicted the pole to win with tensor([[0.8709]])% confidence
CORRECTLY predicted the pole to win with tensor([[0.9742]])% confidence
CORRECTLY predicted the pole to win with tensor([[0.9336]])% confidence
CORRECTLY predicted the pole to lose with tensor([[0.9471]])% confidence
CORRECTLY predicted the pole to lose with tensor([[0.9613]])% confidence
CORRECTLY predicted the pole to win with tensor([[0.9450]])% confidence
CORRECTLY predicted the pole to lose with tensor([[0.9227]]

In [1508]:
scorecard_pole(2023)

14
