In [47]:
import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score, accuracy_score, precision_score, make_scorer
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, RobustScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from tensorflow.keras.utils import to_categorical
from tensorflow.keras import models, layers, regularizers
from tensorflow.keras.callbacks import EarlyStopping

from imblearn.combine import SMOTETomek
from scipy import stats


In [2]:
laps = pd.read_csv("../raw_data/new_clean_data.csv")

  laps = pd.read_csv("../raw_data/new_clean_data.csv")


In [6]:
laps.drop(columns= ["Unnamed: 0.1",
                   "Time",
                   "DriverNumber",
                   "LapTime",
                   "Stint", 
                   "PitOutTime", 
                   "PitInTime", 
                   "Sector1Time",
                   "Sector2Time", 
                   "Sector3Time", 
                   "Sector1SessionTime",
                   "Sector2SessionTime", 
                   "Sector3SessionTime", 
                   "SpeedI1", 
                   "SpeedI2",
                   "SpeedFL",
                   "SpeedST",
                   "LapStartTime",
                   "LapStartDate",
                   "TrackStatus",
                   "Deleted", 
                   "DeletedReason", 
                   "FastF1Generated", 
                   "IsAccurate",
                   "status_list",
                   "TotalLaps",
                   "Time_min", 
                   "Unnamed: 0", 
                   "Time_w", 
                   "AirTemp", 
                   "Humidity", 
                   "Pressure", 
                   "Rainfall",
                   "WindDirection",
                   "WindSpeed", 
                   "Final_Position", 
                   "LocationYear",
                   "next_compound",
                   "FreshTyre",
                   "pitting_this_lap"], inplace = True)

In [7]:
laps["pitting_next_lap"] = laps["pitting_next_lap"].apply(lambda x: 1 if x == True else 0)

In [9]:
laps_for_model = laps.copy()
train_df = laps_for_model[laps_for_model["Year"] < 2022]
val_df = laps_for_model[laps_for_model["Year"] == 2022]
test_df = laps_for_model[laps_for_model["Year"] == 2023]
train_df_shuffled = train_df.sample(frac=1)
val_df_shuffled = val_df.sample(frac=1)
test_df_shuffled = test_df.sample(frac=1)

y_train = train_df_shuffled["pitting_next_lap"]
y_val = val_df_shuffled["pitting_next_lap"]
y_test = test_df_shuffled["pitting_next_lap"]

X_train = train_df_shuffled.drop(columns="pitting_next_lap")
X_val = val_df_shuffled.drop(columns="pitting_next_lap")
X_test = test_df_shuffled.drop(columns="pitting_next_lap")

cat_features = ["Driver", "Compound", "Team", "Location", "second_compound", "TyreStressLevel", "status", "close_ahead", "close_behind", "is_pitting_ahead", "is_pitting_behind", "IsPersonalBest", "Position", "Year"]
cat_features_preproc = make_pipeline(OneHotEncoder(sparse=False, handle_unknown="ignore"))
num_features = ["LapNumber", "TyreLife", "LastTeamRanking", "TrackTemp"]
num_features_preproc = make_pipeline(RobustScaler())

preproc_baseline = make_column_transformer((cat_features_preproc, cat_features),
                                           (num_features_preproc, num_features),
                                           remainder="passthrough")

preproc_baseline.fit(X_train)
X_train_preproc = preproc_baseline.transform(X_train)
X_test_preproc = preproc_baseline.transform(X_test)
X_val_preproc = preproc_baseline.transform(X_val)

smt = SMOTETomek(sampling_strategy=0.5)
X_train_preproc_resamp, y_train_resamp = smt.fit_resample(X_train_preproc, y_train)

def custom_metric(y_test, y_pred_rf):
    predictions = list(y_pred_rf)
    reality = y_test.to_list()
    tracker = 0
    ones_count = reality.count(1)

    for i in range(len(reality)):
        if reality[i] == 1:
            if predictions[i] == 1:
                tracker += 1
            elif i > 0 and predictions[i - 1] == 1:
                tracker += 1
            elif i < len(predictions) - 1 and predictions[i + 1] == 1:
                tracker += 1

    return tracker / ones_count



# KNN 11  neighbors

In [65]:
model_knn = KNeighborsClassifier(n_neighbors=11)

In [66]:
model_knn.fit(X_train_preproc_resamp,y_train_resamp)

In [67]:
y_pred_knn = model_knn.predict(X_test_preproc)

In [68]:
custom_metric(y_test,y_pred_knn)

0.6971830985915493

In [120]:
X_test_eval_df = X_test.copy()
y_test_eval_df = y_test.copy()
X_test_eval_df["RealPitting"] = y_test_eval_df
X_test_eval_df["ModelPitting"] = y_pred_knn
X_test_eval_df.sort_values(by = ["Driver", "Location", "LapNumber"], inplace=True)

In [124]:
X_test_eval_df[(X_test_eval_df["Driver"] == "VER") & (X_test_eval_df["Location"] == "Baku")]

Unnamed: 0,Driver,LapNumber,IsPersonalBest,Compound,TyreLife,Team,Position,Location,Year,LastTeamRanking,...,status,RaceProgress,TyreStressLevel,TrackTemp,close_ahead,close_behind,is_pitting_ahead,is_pitting_behind,RealPitting,ModelPitting
64421,VER,6.0,False,MEDIUM,6.0,RedBull,1.0,Baku,2023,1.0,...,1,0.117647,3,43.3,False,False,False,False,0,0
64422,VER,7.0,True,MEDIUM,7.0,RedBull,1.0,Baku,2023,1.0,...,1,0.137255,3,41.6,False,False,False,False,0,1
64423,VER,8.0,True,MEDIUM,8.0,RedBull,1.0,Baku,2023,1.0,...,1,0.156863,3,41.7,False,False,False,False,0,1
64424,VER,9.0,False,MEDIUM,9.0,RedBull,1.0,Baku,2023,1.0,...,1,0.176471,3,42.2,False,False,False,False,1,0
64425,VER,10.0,False,MEDIUM,10.0,RedBull,3.0,Baku,2023,1.0,...,1,0.196078,3,42.7,False,True,False,False,0,0
64426,VER,11.0,False,HARD,1.0,RedBull,7.0,Baku,2023,1.0,...,2,0.215686,3,41.0,True,False,True,False,0,0
64427,VER,12.0,False,HARD,2.0,RedBull,3.0,Baku,2023,1.0,...,3,0.235294,3,41.0,False,False,False,False,0,0
64428,VER,13.0,False,HARD,3.0,RedBull,3.0,Baku,2023,1.0,...,4,0.254902,3,41.7,False,False,False,False,0,0
64429,VER,14.0,False,HARD,4.0,RedBull,2.0,Baku,2023,1.0,...,1,0.27451,3,42.5,False,False,False,False,0,0
64430,VER,15.0,True,HARD,5.0,RedBull,2.0,Baku,2023,1.0,...,1,0.294118,3,43.0,False,True,False,False,0,0


LECLERC
Barcelona: 4/5
Baku: 4/5
Jeddah: 2,5/5

SAINZ
Barcelona: 2/5
Baku: 2/5
Jeddah: 3/5

HAMILTON
Barcelona: 2/5
Baku: 1/5
Jeddah: 3/5

VERSTAPPEN
Barcelona: 3/5
Baku: 4/5
Jeddah: 2/5

# SVM

In [102]:
model_svm = SVC()
model_svm.fit(X_train_preproc_resamp,y_train_resamp)
y_pred_svm = model_svm.predict(X_test_preproc)
custom_metric(y_test,y_pred_svm)

0.823943661971831

In [115]:
X_test_eval_df = X_test.copy()
y_test_eval_df = y_test.copy()
X_test_eval_df["RealPitting"] = y_test_eval_df
X_test_eval_df["ModelPitting"] = y_pred_svm
X_test_eval_df.sort_values(by = ["Driver", "Location", "LapNumber"], inplace=True)

X_test_eval_df[(X_test_eval_df["Driver"] == "VER") & (X_test_eval_df["Location"] == "Barcelona")]

Unnamed: 0,Driver,LapNumber,IsPersonalBest,Compound,TyreLife,Team,Position,Location,Year,LastTeamRanking,...,status,RaceProgress,TyreStressLevel,TrackTemp,close_ahead,close_behind,is_pitting_ahead,is_pitting_behind,RealPitting,ModelPitting
65695,VER,7.0,False,MEDIUM,7.0,RedBull,1.0,Barcelona,2023,1.0,...,1,0.106061,1,32.2,False,True,False,False,0,0
65696,VER,8.0,True,MEDIUM,8.0,RedBull,1.0,Barcelona,2023,1.0,...,1,0.121212,1,31.6,False,True,False,False,0,0
65697,VER,9.0,False,MEDIUM,9.0,RedBull,1.0,Barcelona,2023,1.0,...,1,0.136364,1,31.8,False,True,False,False,0,0
65698,VER,10.0,False,MEDIUM,10.0,RedBull,1.0,Barcelona,2023,1.0,...,1,0.151515,1,31.9,False,True,False,False,0,0
65699,VER,11.0,False,MEDIUM,11.0,RedBull,1.0,Barcelona,2023,1.0,...,1,0.166667,1,31.5,False,True,False,False,0,0
65700,VER,12.0,False,MEDIUM,12.0,RedBull,1.0,Barcelona,2023,1.0,...,1,0.181818,1,31.5,False,True,False,False,0,0
65701,VER,13.0,False,MEDIUM,13.0,RedBull,1.0,Barcelona,2023,1.0,...,1,0.19697,1,31.7,False,True,False,False,0,0
65702,VER,14.0,False,MEDIUM,14.0,RedBull,1.0,Barcelona,2023,1.0,...,1,0.212121,1,31.9,False,True,False,False,0,0
65703,VER,15.0,False,MEDIUM,15.0,RedBull,1.0,Barcelona,2023,1.0,...,1,0.227273,1,31.2,False,True,False,False,0,0
65704,VER,16.0,False,MEDIUM,16.0,RedBull,1.0,Barcelona,2023,1.0,...,1,0.242424,1,31.2,False,True,False,False,0,0


LECLERC \
Barcelona: 2/5 \
Baku: 2,5/5  \
Jeddah: 3/5 \
moyenne: 2,5

SAINZ \
Barcelona: 3,5/5 \
Baku: 0/5 \
Jeddah: 3/5 \
moyenne: 2,1

HAMILTON  \
Barcelona: 3/5 \
Baku: 3/5 \
Jeddah: 3,5/5 \
moyenne: 3

VERSTAPPEN \
Barcelona: 4/5 \
Baku: 0/5 \
Jeddah: 0/5 \
moyenne: 1,3