In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet
from sklearn.metrics import root_mean_squared_error, r2_score, mean_absolute_error

# Uploading Data and removing outliers and features

In [6]:
DROPPED = [
    "dist_360_SPEED", "dist_360_THROTTLE", "dist_360_STEER", "dist_360_BRAKE",
    "dist_360_CURRENTLAPTIMEINMS", "dist_360_LAPDISTANCE", "dist_360_WORLDPOSITIONX", "dist_360_WORLDPOSITIONY",
    "dist_360_WORLDFORWARDDIRX", "dist_360_WORLDFORWARDDIRY", "dist_360_YAW", "dist_360_PITCH",
    "dist_360_ROLL", "dist_360_left_dist", "dist_360_right_dist", "dist_360_dist_apex_1",
    "dist_360_dist_apex_2", "dist_360_angle_to_apex1", "dist_360_angle_to_apex2", "dist_360_proj_from_ref",
    "dist_430_SPEED", "dist_430_THROTTLE", "dist_430_STEER", "dist_430_BRAKE",
    "dist_430_CURRENTLAPTIMEINMS", "dist_430_LAPDISTANCE", "dist_430_WORLDPOSITIONX", "dist_430_WORLDPOSITIONY",
    "dist_430_WORLDFORWARDDIRX", "dist_430_WORLDFORWARDDIRY", "dist_430_YAW", "dist_430_PITCH",
    "dist_430_ROLL", "dist_430_left_dist", "dist_430_right_dist", "dist_430_dist_apex_1",
    "dist_430_dist_apex_2", "dist_430_angle_to_apex1", "dist_430_angle_to_apex2", "dist_430_proj_from_ref",
    "dist_530_SPEED", "dist_530_THROTTLE", "dist_530_STEER", "dist_530_BRAKE",
    "dist_530_CURRENTLAPTIMEINMS", "dist_530_LAPDISTANCE", "dist_530_WORLDPOSITIONX", "dist_530_WORLDPOSITIONY",
    "dist_530_WORLDFORWARDDIRX", "dist_530_WORLDFORWARDDIRY", "dist_530_YAW", "dist_530_PITCH",
    "dist_530_ROLL", "dist_530_left_dist", "dist_530_right_dist", "dist_530_dist_apex_1",
    "dist_530_dist_apex_2", "dist_530_angle_to_apex1", "dist_530_angle_to_apex2", "dist_530_proj_from_ref",
    "BPS_right_dist", "BPE_right_dist", "THS_right_dist", "THE_right_dist", "STS_right_dist",
    "STM_right_dist", "STE_right_dist", "APX1_right_dist", "APX2_right_dist", "BPS_CURRENTLAPTIMEINMS",
    "BPE_CURRENTLAPTIMEINMS", "THS_CURRENTLAPTIMEINMS", "THE_CURRENTLAPTIMEINMS", "STS_CURRENTLAPTIMEINMS",
    "STM_CURRENTLAPTIMEINMS", "STE_CURRENTLAPTIMEINMS", "APX1_CURRENTLAPTIMEINMS", "APX2_CURRENTLAPTIMEINMS"
]

In [7]:
data = pd.read_csv("final_data_product.csv")
data = data.dropna().drop_duplicates().drop(columns=DROPPED)
target_mean = data["Target_CURRENTLAPTIMEINMS"].mean()
target_std = data["Target_CURRENTLAPTIMEINMS"].std()
data = data[data['Target_CURRENTLAPTIMEINMS'] < target_mean + 3 * target_std] # removes 12 longest times
y = data["Target_CURRENTLAPTIMEINMS"]
X = data.drop(columns=["Target_CURRENTLAPTIMEINMS", "lap_id", "invalid_lap"])

target_columns = [
    'target_CURRENTLAPTIMEINMS', '_LAPDISTANCE', '_WORLDPOSITIONX', 
    '_WORLDPOSITIONY', '_STEER', '_BRAKE', '_THROTTLE', '_SPEED',
]

selected_columns = [col for col in X.columns if col.endswith(tuple(target_columns))]

X = X[selected_columns]

# Split and Scale

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state=42)

pipe = make_pipeline(
    StandardScaler(with_mean=True, with_std=True),
    ElasticNet(max_iter=10000, random_state=42)
)

# GridSearch Best Params

In [9]:
param_grid = {
    "elasticnet__alpha": np.logspace(-3, 2, 10),     # 0.001 ... 100
    "elasticnet__l1_ratio": np.linspace(0.05, 0.95, 10)  # near ridge → near lasso
}

grid = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    cv=5,
    scoring="neg_root_mean_squared_error",
    n_jobs=-1,
    verbose=1
)

grid.fit(X_train, y_train)

print("Best params:", grid.best_params_)
print("CV RMSE :", abs(grid.best_score_))

Fitting 5 folds for each of 100 candidates, totalling 500 fits
Best params: {'elasticnet__alpha': np.float64(0.1668100537200059), 'elasticnet__l1_ratio': np.float64(0.95)}
CV RMSE : 1898.8295615205018


# Test set Evaluation

In [22]:
grid_model = grid.best_estimator_
y_pred = grid_model.predict(X_test)

rmse = root_mean_squared_error(y_test, y_pred)
r2   = r2_score(y_test, y_pred)
mae  = mean_absolute_error(y_test, y_pred)

print(f"Test RMSE: {rmse:,.3f}")
print(f"Test R²  : {r2:,.3f}")
print(f"Test MAE : {mae:,.3f}")

Test RMSE: 1,633.853
Test R²  : 0.746
Test MAE : 892.442


In [21]:
final_model = make_pipeline(
    StandardScaler(),
    ElasticNet(
        alpha=grid.best_params_["elasticnet__alpha"],
        l1_ratio=grid.best_params_["elasticnet__l1_ratio"],
        max_iter=10000,
        random_state=42
    )
)
final_model.fit(X_train, y_train)
y_pred = final_model.predict(X_test)

rmse = root_mean_squared_error(y_test, y_pred)
r2   = r2_score(y_test, y_pred)
mae  = mean_absolute_error(y_test, y_pred)

print(f"Test RMSE: {rmse:,.3f}")
print(f"Test R²  : {r2:,.3f}")
print(f"Test MAE : {mae:,.3f}")

Test RMSE: 1,633.853
Test R²  : 0.746
Test MAE : 892.442


# Coef

In [20]:
enet = final_model.named_steps["elasticnet"]
coefs = pd.Series(enet.coef_, index=X.columns).sort_values(key=np.abs, ascending=False)

print("\nTop 15 |coef| features:")
print(coefs.head(15))

# If you want positive / negative split:
top_pos = coefs[coefs > 0].head(10)
top_neg = coefs[coefs < 0].head(10)
print("\nTop positive coefficients:")
print(top_pos)
print("\nTop negative coefficients:")
print(top_neg)


Top 15 |coef| features:
STS_WORLDPOSITIONY   -1271.432539
STS_WORLDPOSITIONX   -1193.747061
APX2_SPEED            -989.577388
THS_SPEED             -793.489255
STE_SPEED             -790.432015
STM_SPEED             -615.357229
BPS_SPEED             -613.178233
BPE_WORLDPOSITIONX     591.023672
STM_STEER              566.400674
BPS_WORLDPOSITIONX    -508.428087
STE_LAPDISTANCE        455.514125
BPE_WORLDPOSITIONY     439.885032
THS_THROTTLE           439.790586
STM_LAPDISTANCE        432.562372
BPS_WORLDPOSITIONY    -414.976586
dtype: float64

Top positive coefficients:
BPE_WORLDPOSITIONX     591.023672
STM_STEER              566.400674
STE_LAPDISTANCE        455.514125
BPE_WORLDPOSITIONY     439.885032
THS_THROTTLE           439.790586
STM_LAPDISTANCE        432.562372
APX2_WORLDPOSITIONY    359.498345
BPS_LAPDISTANCE        347.226884
THS_WORLDPOSITIONX     294.534366
THE_WORLDPOSITIONX     255.470417
dtype: float64

Top negative coefficients:
STS_WORLDPOSITIONY   -1271.432539
STS_W

# Prediction

In [23]:
model = final_model

In [25]:
percentiles = (0.000001, 0.95)
bounds = {f: (data[f].quantile(percentiles[0]), data[f].quantile(percentiles[1])) for f in X.columns}

rng = np.random.default_rng(42)
N = 50_000
candidates = {f: rng.uniform(low=b[0], high=b[1], size=N) for f, b in bounds.items()}
Xcand = X.sample(N, replace=True, random_state=42)
ycand = model.predict(Xcand)

imin = int(np.argmin(ycand))
best_combo = Xcand.iloc[imin].to_dict()
best_pred  = ycand[imin]

print("\n=== Elastic Net-suggested setup (within observed range) ===")
for k, v in best_combo.items():
    print(f"{k}: {v:,.4f}")
print(f"Predicted Target_CURRENTLAPTIMEINMS: {best_pred:,.3f}")


=== Elastic Net-suggested setup (within observed range) ===
BPS_SPEED: 306.0000
BPS_THROTTLE: 1.0000
BPS_STEER: -0.0105
BPS_BRAKE: 0.0000
BPS_LAPDISTANCE: 266.0000
BPS_WORLDPOSITIONX: 298.5836
BPS_WORLDPOSITIONY: 282.8180
BPS_ext_LAPDISTANCE: 275.0000
BPE_SPEED: 260.0000
BPE_THROTTLE: 0.4000
BPE_STEER: 0.0377
BPE_BRAKE: 0.5757
BPE_LAPDISTANCE: 310.0000
BPE_WORLDPOSITIONX: 325.7499
BPE_WORLDPOSITIONY: 247.8683
BPE_ext_LAPDISTANCE: 417.0000
THS_SPEED: 305.0000
THS_THROTTLE: 0.8818
THS_STEER: -0.0016
THS_BRAKE: 0.1153
THS_LAPDISTANCE: 267.0000
THS_WORLDPOSITIONX: 298.5836
THS_WORLDPOSITIONY: 282.8180
THS_ext_LAPDISTANCE: 271.0000
THE_SPEED: 187.0000
THE_THROTTLE: 1.0000
THE_STEER: 0.0026
THE_BRAKE: 0.0000
THE_LAPDISTANCE: 417.0000
THE_WORLDPOSITIONX: 369.2132
THE_WORLDPOSITIONY: 167.8260
THE_ext_LAPDISTANCE: 417.0000
STS_SPEED: 312.0000
STS_THROTTLE: 1.0000
STS_STEER: 0.0004
STS_BRAKE: 0.0000
STS_LAPDISTANCE: 194.0000
STS_WORLDPOSITIONX: 250.5196
STS_WORLDPOSITIONY: 331.5493
STS_ext_LAPD