In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, precision_score
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, RobustScaler, LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import models, layers
from tensorflow.keras.callbacks import EarlyStopping

from plot_keras_history import plot_history

In [None]:
X = laps.drop(columns = 'Compound')
y = laps['Compound']

In [None]:
le = LabelEncoder()
le.fit(y)
y_le = le.transform(y)
y_cat = to_categorical(y_le, num_classes=None, dtype = 'float32')
y_cat

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.15)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)

In [None]:
feat_numerical = sorted(X.select_dtypes(include=["int64", "float64"]).columns)

In [None]:
cat_features = ["Driver", "Team", "IsPersonalBest", "FreshTyre", "Location"]
cat_features_preproc = make_pipeline(OneHotEncoder(sparse=False, handle_unknown="ignore"))
num_features = ["LapNumber", "TyreLife", "Position", "TotalLaps", "TyreStressLevel", "pitting_this_lap", "RaceProgress", "Year"]
num_features_preproc = make_pipeline(RobustScaler())

In [None]:
preproc_baseline = make_column_transformer((cat_features_preproc, cat_features),
                                           (num_features_preproc, num_features),
                                           remainder="passthrough")

In [None]:
preproc_baseline.fit(X_train)
X_train_preproc = preproc_baseline.transform(X_train)
X_test_preproc = preproc_baseline.transform(X_test)
X_val_preproc = preproc_baseline.transform(X_val)

In [None]:
from tensorflow.keras import models, layers
from tensorflow.keras.callbacks import EarlyStopping

model = models.Sequential()

model.add(layers.Dense(32, activation="relu", input_dim = 98))

model.add(layers.Dense(6, activation="softmax"))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics='accuracy')

In [None]:
es = EarlyStopping(patience=10, restore_best_weights=True)

In [None]:
history = model.fit(X_train_preproc, y_train, 
          batch_size=256, epochs=100, 
          validation_data=(X_val_preproc, y_val),
          callbacks=[es])
history

In [None]:
y_pred = model.predict(X_test_preproc)
y_pred

In [None]:
y_pred_encode = []
for prediction in y_pred:
    y_pred_encode.append(prediction.argmax())
        
y_pred_encode

In [None]:
y_classes = [np.argmax(y, axis=None, out=None) for y in y_test]
y_classes

In [None]:
baseline_acc = accuracy_score(y_classes, y_pred_encode)
baseline_acc

In [None]:
baseline_f1 = f1_score(y_classes, y_pred_encode, average='weighted')
baseline_f1

In [None]:
model.summary()

In [None]:
plot_history(history)