# Imports

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import f1_score, accuracy_score
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OneHotEncoder, RobustScaler, LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import models, layers, regularizers
from tensorflow.keras.callbacks import EarlyStopping

# Loading Data

In [2]:
laps = pd.read_csv("../raw_data/new_clean_data.csv")

  laps = pd.read_csv("../raw_data/new_clean_data.csv")


In [3]:
X = laps.drop(columns = 'next_compound')
y = laps['next_compound']

In [4]:
le = LabelEncoder()
y_le = le.fit_transform(y)
y_cat = to_categorical(y_le, num_classes=None,dtype='float32')

# Train Test Val Split

In [5]:
laps_for_model = laps.copy()
train_df = laps_for_model[laps_for_model['Year'] < 2022]
val_df = laps_for_model[laps_for_model['Year'] == 2022]
test_df = laps_for_model[laps_for_model['Year'] == 2023]
train_df_shuffled = train_df.sample(frac=1)
val_df_shuffled = val_df.sample(frac=1)
test_df_shuffled = test_df.sample(frac=1)
y_train = train_df_shuffled['next_compound']
y_val = val_df_shuffled['next_compound']
y_test = test_df_shuffled['next_compound']
X_train = train_df_shuffled.drop(columns=['next_compound'])
X_val = val_df_shuffled.drop(columns=['next_compound'])
X_test = test_df_shuffled.drop(columns=['next_compound'])

In [6]:
le = LabelEncoder()
y_train_le = le.fit_transform(y_train)
y_test_le = le.transform(y_test)
y_val_le = le.transform(y_val)
y_train_cat = to_categorical(y_train_le, num_classes=None, dtype = 'float32')
y_test_cat = to_categorical(y_test_le, num_classes=None, dtype = 'float32')
y_val_cat = to_categorical(y_val_le, num_classes=None, dtype = 'float32')

# Preprocessing

In [7]:
laps = laps.drop(columns = ["Unnamed: 0.1","Time","DriverNumber","LapTime","Stint","PitOutTime","PitInTime","Sector1Time","Sector2Time","Sector3Time","Sector1SessionTime","Sector2SessionTime","Sector3SessionTime","SpeedI1", "SpeedI2", "SpeedFL", "SpeedST", "LapStartTime","LapStartDate","TrackStatus","Deleted","DeletedReason","FastF1Generated","IsAccurate","status_list","TotalLaps","Time_min","Unnamed: 0","Time_w","AirTemp","Humidity","Pressure","Rainfall","WindDirection","WindSpeed","Final_Position","LocationYear","FreshTyre","pitting_this_lap"])

In [8]:
cat_features = ["Driver", "Team", "IsPersonalBest", "Location",'second_compound', 'Compound', 'close_ahead', 'close_behind', 'is_pitting_ahead','is_pitting_behind']
cat_features_preproc = make_pipeline(OneHotEncoder(sparse=False, handle_unknown="ignore"))
num_features = ["LapNumber", "TyreLife", "Position", "TyreStressLevel", "RaceProgress", "Year", 'LastTeamRanking','status','TrackTemp', 'pitting_next_lap']
num_features_preproc = make_pipeline(RobustScaler())

In [9]:
preproc_baseline = make_column_transformer((cat_features_preproc, cat_features),
                                           (num_features_preproc, num_features),
                                           remainder="passthrough")

In [10]:
preproc_baseline.fit(X_train)
X_train_preproc = preproc_baseline.transform(X_train)
X_test_preproc = preproc_baseline.transform(X_test)
X_val_preproc = preproc_baseline.transform(X_val)



# Model

In [11]:
model = models.Sequential()

model.add(layers.Dense(128, activation="relu", input_dim=169, kernel_regularizer=regularizers.l1(0.001)))
model.add(layers.Dense(128, activation="relu", kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.Dense(64, activation="relu", kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.Dense(64, activation="relu", kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.Dense(32, activation="relu", kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.Dense(32, activation="relu", kernel_regularizer=regularizers.l2(0.001)))

model.add(layers.Dense(3, activation="softmax"))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [15]:
laps.dtypes

cat : ['Driver','Compound','Team','Location']
num : ['LapNumber', 'IsPersonalBest', 'TyreLife', 'Position']

Driver                object
LapNumber            float64
IsPersonalBest          bool
Compound              object
TyreLife             float64
Team                  object
Position             float64
Location              object
Year                   int64
LastTeamRanking      float64
second_compound         bool
status                 int64
RaceProgress         float64
TyreStressLevel        int64
TrackTemp            float64
close_ahead             bool
close_behind            bool
is_pitting_ahead        bool
is_pitting_behind       bool
pitting_next_lap        bool
next_compound         object
dtype: object

In [12]:
es = EarlyStopping(patience=20, restore_best_weights=True)
history = model.fit(X_train_preproc, y_train_cat, 
          batch_size=256, epochs=1500, 
          validation_data=(X_val_preproc, y_val_cat),
          callbacks=[es])
history

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).