## Setup

In [2]:
import numpy as np
import pandas as pd


from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error


In [3]:
train_features = pd.read_pickle(r"C:\data\train_features.pkl")
test_features = pd.read_pickle(r"C:\data\test_features.pkl")

## Categorical encoding

In [5]:
# Define which columns are categorical
categorical_cols = ['Driver', 'Compound']

# One-hot encode training and test sets
train_encoded = pd.get_dummies(train_features, columns=['Driver', 'Compound'], drop_first=True)
test_encoded = pd.get_dummies(test_features, columns=['Driver', 'Compound'], drop_first=True)

missing_cols = set(train_encoded.columns) - set(test_encoded.columns)
for col in missing_cols:
    test_encoded[col] = 0

test_encoded = test_encoded[train_encoded.columns]

print("Encoded training set shape:", train_encoded.shape)
print("Encoded test set shape:", test_encoded.shape)


Encoded training set shape: (2913, 33)
Encoded test set shape: (921, 33)


## Random Forest Regressor

In [7]:
X_train = train_encoded.drop("LapTimeSeconds", axis=1)
y_train = train_encoded["LapTimeSeconds"]

X_test = test_encoded.drop("LapTimeSeconds", axis=1)
y_test = test_encoded["LapTimeSeconds"]

In [8]:
rf_model = RandomForestRegressor(
    n_estimators=500,
    max_depth=8,          
    random_state=42,
    n_jobs=-1              
)
rf_model.fit(X_train, y_train)


predictions = rf_model.predict(X_test)

In [29]:
# Evaluate performance
mae = mean_absolute_error(y_test, predictions)
rmse = mean_squared_error(y_test, predictions, squared=False)

print("Random Forest Model Performance")
print(f"MAE  : {mae:.3f} sec")
print(f"RMSE : {rmse:.3f} sec")

Random Forest Model Performance
MAE  : 1.012 sec
RMSE : 1.229 sec


## Gradient Boosting

In [11]:
X_train = train_encoded.drop("LapTimeSeconds", axis=1)
y_train = train_encoded["LapTimeSeconds"]

X_test = test_encoded.drop("LapTimeSeconds", axis=1)
y_test = test_encoded["LapTimeSeconds"]

In [12]:
gbr_model = GradientBoostingRegressor(
    n_estimators=300,      
    max_depth=3,           
    learning_rate=0.05,    
    random_state=42
)


gbr_model.fit(X_train, y_train)


gbr_preds = gbr_model.predict(X_test)

In [13]:
# Evaluate performance
mae = mean_absolute_error(y_test, gbr_preds)
rmse = mean_squared_error(y_test, gbr_preds, squared=False)

print("Gradient Boosting Regressor Performance:")
print(f"Mean Absolute Error (MAE): {mae:.3f} sec")
print(f"Root Mean Squared Error (RMSE): {rmse:.3f} sec")

Gradient Boosting Regressor Performance:
Mean Absolute Error (MAE): 1.017 sec
Root Mean Squared Error (RMSE): 1.210 sec


## Model.pkl

In [15]:
import pickle

with open("C:/models/rf_model.pkl", 'wb') as file:
    pickle.dump(rf_model, file)

In [16]:
with open("C:/models/gbr_model.pkl", 'wb') as file:
    pickle.dump(gbr_model, file)