In [24]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import transforms
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import torch.nn.functional as F

In [25]:
data = pd.read_csv('data/processed_data.csv')

In [26]:
viewport = data.copy()
viewport[(viewport.season == 2021) & (viewport['round'] == 3)][:22]

Unnamed: 0,season,round,country,url,driver,grid,podium,driver_points,driver_wins,driver_standings_pos,...,constructor_minardi,constructor_racing_point,constructor_red_bull,constructor_renault,constructor_sauber,constructor_super_aguri,constructor_toro_rosso,constructor_toyota,constructor_virgin,constructor_williams
7155,2021,3,Portugal,http://en.wikipedia.org/wiki/2021_Portuguese_G...,hamilton,2,1,69.0,2.0,1.0,...,False,False,False,False,False,False,False,False,False,False
7156,2021,3,Portugal,http://en.wikipedia.org/wiki/2021_Portuguese_G...,max_verstappen,3,2,61.0,1.0,2.0,...,False,False,True,False,False,False,False,False,False,False
7157,2021,3,Portugal,http://en.wikipedia.org/wiki/2021_Portuguese_G...,bottas,1,3,32.0,0.0,4.0,...,False,False,False,False,False,False,False,False,False,False
7158,2021,3,Portugal,http://en.wikipedia.org/wiki/2021_Portuguese_G...,perez,4,4,22.0,0.0,6.0,...,False,False,True,False,False,False,False,False,False,False
7159,2021,3,Portugal,http://en.wikipedia.org/wiki/2021_Portuguese_G...,norris,7,5,37.0,0.0,3.0,...,False,False,False,False,False,False,False,False,False,False
7160,2021,3,Portugal,http://en.wikipedia.org/wiki/2021_Portuguese_G...,leclerc,8,6,28.0,0.0,5.0,...,False,False,False,False,False,False,False,False,False,False
7161,2021,3,Portugal,http://en.wikipedia.org/wiki/2021_Portuguese_G...,ocon,6,7,8.0,0.0,9.0,...,False,False,False,False,False,False,False,False,False,False
7162,2021,3,Portugal,http://en.wikipedia.org/wiki/2021_Portuguese_G...,alonso,13,8,5.0,0.0,12.0,...,False,False,False,False,False,False,False,False,False,False
7163,2021,3,Portugal,http://en.wikipedia.org/wiki/2021_Portuguese_G...,ricciardo,16,9,16.0,0.0,7.0,...,False,False,False,False,False,False,False,False,False,False
7164,2021,3,Portugal,http://en.wikipedia.org/wiki/2021_Portuguese_G...,gasly,9,10,7.0,0.0,10.0,...,False,False,False,False,False,False,False,False,False,False


In [27]:
df = data.copy()
df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

train = df[df.season <2021]
train.head()

Unnamed: 0,season,round,country,url,driver,grid,podium,driver_points,driver_wins,driver_standings_pos,...,constructor_minardi,constructor_racing_point,constructor_red_bull,constructor_renault,constructor_sauber,constructor_super_aguri,constructor_toro_rosso,constructor_toyota,constructor_virgin,constructor_williams
0,2003,1,Australia,http://en.wikipedia.org/wiki/2003_Australian_G...,coulthard,11,1,10.0,1.0,1.0,...,False,False,False,False,False,False,False,False,False,False
1,2003,1,Australia,http://en.wikipedia.org/wiki/2003_Australian_G...,montoya,3,0,8.0,0.0,2.0,...,False,False,False,False,False,False,False,False,False,True
2,2003,1,Australia,http://en.wikipedia.org/wiki/2003_Australian_G...,raikkonen,15,0,6.0,0.0,3.0,...,False,False,False,False,False,False,False,False,False,False
3,2003,1,Australia,http://en.wikipedia.org/wiki/2003_Australian_G...,michael_schumacher,1,0,5.0,0.0,4.0,...,False,False,False,False,False,False,False,False,False,False
4,2003,1,Australia,http://en.wikipedia.org/wiki/2003_Australian_G...,trulli,12,0,4.0,0.0,5.0,...,False,False,False,True,False,False,False,False,False,False


In [28]:
X_train = train.drop(['driver', 'country', 'podium', 'url'], axis = 1)
y_train = train.podium

scaler = StandardScaler()
y_train.head()

0    1
1    0
2    0
3    0
4    0
Name: podium, dtype: int64

In [29]:
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns = X_train.columns)

In [30]:
class F1RacePrediction(nn.Module):
  def __init__(self):
    super().__init__()
    self.layers = nn.Sequential (
        nn.Flatten(),
        nn.Linear(96, 75),
        nn.ReLU(),
        nn.Linear(75, 25),
        nn.ReLU(),
        nn.Linear(25,50),
        nn.ReLU(),
        nn.Linear(50,10)
    )

  def forward(self, x):
    return self.layers(x)

In [31]:
X_train = torch.FloatTensor(X_train.to_numpy())
y_train = torch.LongTensor(y_train)

In [32]:
torch.manual_seed(42)
model = F1RacePrediction()
model

F1RacePrediction(
  (layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=96, out_features=75, bias=True)
    (2): ReLU()
    (3): Linear(in_features=75, out_features=25, bias=True)
    (4): ReLU()
    (5): Linear(in_features=25, out_features=50, bias=True)
    (6): ReLU()
    (7): Linear(in_features=50, out_features=10, bias=True)
  )
)

In [33]:
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), 
                            lr=1e-4)

In [34]:
# Train our model!
# Epochs? (one run thru all the training data in our network)
epochs = 1700
losses = []
for i in range(epochs):
    optimizer.zero_grad()
    y_pred = model(X_train)
    loss = loss_function(y_pred, y_train) # predicted values vs the y_train
    loss.backward()
    optimizer.step()
    
    if i % 10 == 0:
        print(f'Epoch: {i} and loss: {loss}')

Epoch: 0 and loss: 2.256469488143921
Epoch: 10 and loss: 2.2377305030822754
Epoch: 20 and loss: 2.2185897827148438
Epoch: 30 and loss: 2.1986076831817627
Epoch: 40 and loss: 2.177258014678955
Epoch: 50 and loss: 2.1539220809936523
Epoch: 60 and loss: 2.1278464794158936
Epoch: 70 and loss: 2.09820818901062
Epoch: 80 and loss: 2.064023017883301
Epoch: 90 and loss: 2.0240752696990967
Epoch: 100 and loss: 1.9768962860107422
Epoch: 110 and loss: 1.9205601215362549
Epoch: 120 and loss: 1.852848768234253
Epoch: 130 and loss: 1.7718623876571655
Epoch: 140 and loss: 1.6764267683029175
Epoch: 150 and loss: 1.566476821899414
Epoch: 160 and loss: 1.4432413578033447
Epoch: 170 and loss: 1.3090931177139282
Epoch: 180 and loss: 1.167349934577942
Epoch: 190 and loss: 1.0228010416030884
Epoch: 200 and loss: 0.8812899589538574
Epoch: 210 and loss: 0.7491693496704102
Epoch: 220 and loss: 0.6320563554763794
Epoch: 230 and loss: 0.5335862636566162
Epoch: 240 and loss: 0.4546900689601898
Epoch: 250 and loss

In [48]:
from copy import deepcopy

torch.save(deepcopy(model.state_dict()), 'racemodel.pth')

In [43]:
def scorecard_ts(season, model2):
    df = data.copy()
    df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)
    score = 0
    count = 0
    predicted = 0
    for circuit in df[df.season == season]['round'].unique():
        count += 1
        winner = data[(data.season == season) & (data['round'] == circuit) & (data['podium'] == 1)].grid
        try:
            winner = winner.to_numpy()[0]
        except:
            winner = None
        model2.eval()

        conf = 1
        guess = 0
        pred = False
        for grid in range(20):
            test = df[(df.season == season) & (df['round'] == circuit) & (df['grid'] == grid + 1)]
            X_test = test.drop(['driver', 'country', 'podium', 'url'], axis=1)
            try:
                X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)
            except:
                continue
            X_test = torch.Tensor(X_test.to_numpy())
            prediction = model2(X_test)
            prob = F.softmax(prediction, dim=1)
            top_p, top_class = prob.topk(1, dim=1)
            if prediction.argmax().item():
                predicted += 1
                print(
                    f'{"CORRECTLY" if grid + 1 == winner else "INCORRECTLY"} predicted. P{grid + 1} should win in round {circuit} (actual winner was P{winner})')
                score += 1 if grid + 1 == winner else 0
                pred = True
                break
            elif top_p[0][0] < conf:
                conf = top_p
                guess = grid + 1

        if not pred:
            predicted += 1
            print(
                f'{"CORRECTLY" if guess == winner else "INCORRECTLY"} predicted. P{guess} should win in round {circuit} (actual winner was P{winner})')
            score += 1 if guess == winner else 0

    print(f'{score} out of {predicted} predicted races')

In [46]:
model2 = F1RacePrediction()
model2.load_state_dict(torch.load('racemodel.pth', weights_only=True))
scorecard_ts(2023, model2)

CORRECTLY predicted. P1 should win in round 1 (actual winner was P1)
INCORRECTLY predicted. P3 should win in round 2 (actual winner was P1)
CORRECTLY predicted. P1 should win in round 3 (actual winner was P1)
INCORRECTLY predicted. P2 should win in round 4 (actual winner was P3)
CORRECTLY predicted. P9 should win in round 5 (actual winner was P9)
CORRECTLY predicted. P1 should win in round 6 (actual winner was P1)
CORRECTLY predicted. P1 should win in round 7 (actual winner was P1)
CORRECTLY predicted. P1 should win in round 8 (actual winner was P1)
CORRECTLY predicted. P1 should win in round 9 (actual winner was P1)
CORRECTLY predicted. P1 should win in round 10 (actual winner was P1)
CORRECTLY predicted. P2 should win in round 11 (actual winner was P2)
CORRECTLY predicted. P6 should win in round 12 (actual winner was P6)
CORRECTLY predicted. P1 should win in round 13 (actual winner was P1)
CORRECTLY predicted. P2 should win in round 14 (actual winner was P2)
INCORRECTLY predicted. P1

In [47]:
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

Model's state_dict:
layers.1.weight 	 torch.Size([75, 96])
layers.1.bias 	 torch.Size([75])
layers.3.weight 	 torch.Size([25, 75])
layers.3.bias 	 torch.Size([25])
layers.5.weight 	 torch.Size([50, 25])
layers.5.bias 	 torch.Size([50])
layers.7.weight 	 torch.Size([10, 50])
layers.7.bias 	 torch.Size([10])
