In [5]:
from joblib import dump

dump(driver_model, "../models/driver_model.pkl")
dump(constructor_model, "../models/constructor_model.pkl")


['../models/constructor_model.pkl']

In [4]:
# ============================================
# Phase 3 - Model Training Notebook
# File: notebooks/03_model_training.ipynb
# ============================================

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# =========================
# Load Engineered Features
# =========================

driver_df = pd.read_csv("../data/features/driver_features.csv")
constructor_df = pd.read_csv("../data/features/constructor_features.csv")

# =========================
# Train Driver Champion Model
# =========================

print("🎯 Training Driver Champion Model...")

# Features and label
X_driver = driver_df.drop(columns=["season", "driver", "is_champion"])
y_driver = driver_df["is_champion"]

# Split data
X_train_d, X_test_d, y_train_d, y_test_d = train_test_split(
    X_driver, y_driver, test_size=0.2, random_state=42
)

# Train model
driver_model = RandomForestClassifier(n_estimators=100, random_state=42)
driver_model.fit(X_train_d, y_train_d)

# Evaluate
y_pred_d = driver_model.predict(X_test_d)
print("\n🔍 Driver Model Evaluation:")
print("Accuracy:", accuracy_score(y_test_d, y_pred_d))
print(classification_report(y_test_d, y_pred_d))

# =========================
# Train Constructor Champion Model
# =========================

print("\n🏎️ Training Constructor Champion Model...")

# Features and label
X_constructor = constructor_df.drop(columns=["season", "constructor", "is_champion"])
y_constructor = constructor_df["is_champion"]

# Split data
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(
    X_constructor, y_constructor, test_size=0.2, random_state=42
)

# Train model
constructor_model = RandomForestClassifier(n_estimators=100, random_state=42)
constructor_model.fit(X_train_c, y_train_c)

# Evaluate
y_pred_c = constructor_model.predict(X_test_c)
print("\n🔍 Constructor Model Evaluation:")
print("Accuracy:", accuracy_score(y_test_c, y_pred_c))
print(classification_report(y_test_c, y_pred_c))


🎯 Training Driver Champion Model...

🔍 Driver Model Evaluation:
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       1.00      1.00      1.00         1

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10


🏎️ Training Constructor Champion Model...

🔍 Constructor Model Evaluation:
Accuracy: 0.6666666666666666
              precision    recall  f1-score   support

           0       0.80      0.80      0.80         5
           1       0.00      0.00      0.00         1

    accuracy                           0.67         6
   macro avg       0.40      0.40      0.40         6
weighted avg       0.67      0.67      0.67         6



In [3]:
print("Driver Features Columns:", driver_df.columns.tolist())
print("Constructor Features Columns:", constructor_df.columns.tolist())



Driver Features Columns: ['season', 'driver', 'wins', 'seasons_participated']
Constructor Features Columns: ['season', 'constructor', 'wins', 'final_position']
