# Predicting Lap Time Seconds: End-to-End Regression Pipeline

This notebook demonstrates how to predict `Lap_Time_Seconds` using a machine learning pipeline with pandas and scikit-learn. It covers data loading, preprocessing, model training, validation, and generating a submission file.

In [None]:
# Import Required Libraries
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import LabelEncoder


In [None]:
# Load datasets
train = pd.read_csv('train.csv')
val = pd.read_csv('val.csv')
test = pd.read_csv('test.csv')
sample_submission = pd.read_csv('sample_submission.csv')

print('Train shape:', train.shape)
print('Validation shape:', val.shape)
print('Test shape:', test.shape)
train.head()

In [None]:
# Basic preprocessing: handle categorical variables
for col in train.columns:
    if train[col].dtype == 'object':
        le = LabelEncoder()
        train[col] = le.fit_transform(train[col].astype(str))
        val[col] = le.transform(val[col].astype(str))
        if col in test.columns:
            test[col] = le.transform(test[col].astype(str))

In [None]:
# Separate features and target

target = 'Lap_Time_Seconds'
features = [col for col in train.columns if col != target]

X_train = train[features]
y_train = train[target]
X_val = val[features]
y_val = val[target]
X_test = test[features]

In [None]:
# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Validation
val_preds = model.predict(X_val)
mae = mean_absolute_error(y_val, val_preds)
print(f'Validation MAE: {mae:.4f}')

In [None]:
# Predict on test set and prepare submission

test_preds = model.predict(X_test)
submission = sample_submission.copy()
submission['Lap_Time_Seconds'] = test_preds
submission.to_csv('submission.csv', index=False)
print('Predictions saved to submission.csv')