In [None]:
# ==============================
# BLOCK 1: IMPORTS & CONFIG
# ==============================

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score
from tqdm import tqdm

# ------------------------------
# HYPERPARAMETERS (TUNE HERE)
# ------------------------------

SEQ_LEN = 20                  # How many previous laps to look at
BATCH_SIZE = 64
EPOCHS = 20
LR = 3e-4                     # Learning rate
D_MODEL = 128                 # Transformer hidden size
NHEAD = 4                     # Attention heads
NUM_LAYERS = 3                # Transformer layers
DROPOUT = 0.1
ALPHA = 1.0                   # Weight for pit loss
BETA = 0.5                    # Weight for tire loss
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
# ==============================
# BLOCK 2: LOAD DATA
# ==============================

data_path = "/Users/nurulmansibtalukder/Desktop/WAY TO EVERYTHING/BUET_STUFFS/3-2/CSE 330 - Machine Learning Sessional/ML-Project-CSE330---F1-Pitstop-Strategy-Predictor/src/f1_complete_dataset_2020_2024.csv"
df = pd.read_csv(data_path)

df.head()

In [None]:
# ==============================
# BLOCK 3: FEATURE ENGINEERING
# ==============================

# Example derived features
df["degradation"] = df.groupby(["race_id", "driver_id"])["lap_time"].diff().fillna(0)

df["rolling_avg_3"] = (
    df.groupby(["race_id", "driver_id"])["lap_time"]
    .rolling(3)
    .mean()
    .reset_index(level=[0,1], drop=True)
)

df["rolling_avg_3"] = df["rolling_avg_3"].fillna(method="bfill")

# Undercut flag (example logic)
df["undercut_flag"] = (
    (df["gap_to_front"] < 3.0) & (df["tire_age"] > 10)
).astype(int)

# Target
df["pit_label"] = df["is_pit_lap"]  # 1 if pit occurred
df["tire_label"] = df["next_compound"]  # must be encoded as 0,1,2

In [None]:
# ==============================
# BLOCK 4: NORMALIZATION
# ==============================

continuous_cols = [
    "lap_number",
    "position",
    "gap_to_leader",
    "gap_to_front",
    "gap_to_behind",
    "lap_time",
    "sector_1",
    "sector_2",
    "sector_3",
    "track_temp",
    "tire_age",
    "degradation",
    "rolling_avg_3",
]

scaler = StandardScaler()
df[continuous_cols] = scaler.fit_transform(df[continuous_cols])

In [None]:
# ==============================
# BLOCK 5: SEQUENCE CREATION
# ==============================

def create_sequences(df, seq_len):
    sequences = []
    
    grouped = df.groupby(["race_id", "driver_id"])
    
    for (_, _), group in grouped:
        group = group.sort_values("lap_number")
        
        for i in range(len(group) - seq_len):
            seq = group.iloc[i:i+seq_len]
            target_row = group.iloc[i+seq_len]
            
            sequences.append({
                "features": seq,
                "pit_label": target_row["pit_label"],
                "tire_label": target_row["tire_label"]
            })
    
    return sequences

sequences = create_sequences(df, SEQ_LEN)

In [None]:
# ==============================
# BLOCK 6: DATASET CLASS
# ==============================

class F1Dataset(Dataset):
    def __init__(self, sequences):
        self.sequences = sequences
        
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        item = self.sequences[idx]
        seq_df = item["features"]
        
        lap_features = seq_df[continuous_cols + ["undercut_flag"]].values
        
        driver_id = seq_df["driver_id"].iloc[0]
        track_id = seq_df["track_id"].iloc[0]
        
        pit_label = item["pit_label"]
        tire_label = item["tire_label"]
        
        return (
            torch.tensor(lap_features, dtype=torch.float32),
            torch.tensor(driver_id, dtype=torch.long),
            torch.tensor(track_id, dtype=torch.long),
            torch.tensor(pit_label, dtype=torch.float32),
            torch.tensor(tire_label, dtype=torch.long)
        )

dataset = F1Dataset(sequences)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
# ==============================
# BLOCK 7: TRANSFORMER MODEL
# ==============================

class PitTransformer(nn.Module):
    def __init__(self, input_dim, num_drivers, num_tracks):
        super().__init__()
        
        # Embeddings (TUNE DIMENSIONS HERE)
        self.driver_emb = nn.Embedding(num_drivers, 8)
        self.track_emb = nn.Embedding(num_tracks, 8)
        
        self.input_proj = nn.Linear(input_dim + 16, D_MODEL)
        
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=D_MODEL,
            nhead=NHEAD,
            dropout=DROPOUT,
            batch_first=True
        )
        
        self.transformer = nn.TransformerEncoder(
            encoder_layer,
            num_layers=NUM_LAYERS
        )
        
        # Multi-task heads
        self.pit_head = nn.Linear(D_MODEL, 1)
        self.tire_head = nn.Linear(D_MODEL, 3)
    
    def forward(self, x, driver_id, track_id):
        driver_emb = self.driver_emb(driver_id)
        track_emb = self.track_emb(track_id)
        
        context = torch.cat([driver_emb, track_emb], dim=1)
        context = context.unsqueeze(1).repeat(1, x.size(1), 1)
        
        x = torch.cat([x, context], dim=2)
        x = self.input_proj(x)
        
        x = self.transformer(x)
        
        h_t = x[:, -1, :]
        
        pit_logits = self.pit_head(h_t)
        tire_logits = self.tire_head(h_t)
        
        return pit_logits, tire_logits

In [None]:
# ==============================
# BLOCK 8: LOSS FUNCTIONS
# ==============================

class FocalLoss(nn.Module):
    def __init__(self, gamma=2):
        super().__init__()
        self.gamma = gamma
    
    def forward(self, logits, targets):
        bce = F.binary_cross_entropy_with_logits(logits, targets.unsqueeze(1), reduction='none')
        pt = torch.exp(-bce)
        loss = ((1 - pt) ** self.gamma) * bce
        return loss.mean()

pit_loss_fn = FocalLoss(gamma=2)   # TUNE gamma
tire_loss_fn = nn.CrossEntropyLoss()

In [None]:
# ==============================
# BLOCK 9: TRAINING LOOP
# ==============================

model = PitTransformer(
    input_dim=len(continuous_cols) + 1,
    num_drivers=df["driver_id"].nunique(),
    num_tracks=df["track_id"].nunique()
).to(DEVICE)

optimizer = torch.optim.AdamW(model.parameters(), lr=LR)

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    
    for x, driver_id, track_id, pit_label, tire_label in tqdm(dataloader):
        
        x = x.to(DEVICE)
        driver_id = driver_id.to(DEVICE)
        track_id = track_id.to(DEVICE)
        pit_label = pit_label.to(DEVICE)
        tire_label = tire_label.to(DEVICE)
        
        optimizer.zero_grad()
        
        pit_logits, tire_logits = model(x, driver_id, track_id)
        
        pit_loss = pit_loss_fn(pit_logits, pit_label)
        
        tire_mask = pit_label == 1
        if tire_mask.sum() > 0:
            tire_loss = tire_loss_fn(tire_logits[tire_mask], tire_label[tire_mask])
        else:
            tire_loss = torch.tensor(0.0).to(DEVICE)
        
        loss = ALPHA * pit_loss + BETA * tire_loss
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(dataloader):.4f}")

In [None]:
# ==============================
# BLOCK 10: EVALUATION
# ==============================

model.eval()

all_preds = []
all_targets = []

with torch.no_grad():
    for x, driver_id, track_id, pit_label, tire_label in dataloader:
        
        x = x.to(DEVICE)
        driver_id = driver_id.to(DEVICE)
        track_id = track_id.to(DEVICE)
        
        pit_logits, _ = model(x, driver_id, track_id)
        probs = torch.sigmoid(pit_logits).cpu().numpy()
        
        preds = (probs > 0.5).astype(int)
        
        all_preds.extend(preds.flatten())
        all_targets.extend(pit_label.numpy())

print("F1:", f1_score(all_targets, all_preds))
print("Precision:", precision_score(all_targets, all_preds))
print("Recall:", recall_score(all_targets, all_preds))