In [7]:
# import warnings
# import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.feature_selection import mutual_info_regression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import root_mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR 
from statsmodels.stats.outliers_influence import variance_inflation_factor
# import seaborn as sns

Key performance factors: Identify the most influential features (e.g., tire pressure, throttle position, braking force).

Optimal feature values: For each key factor, find the optimal range or value that minimizes lap time.

Feature interactions: Discover how combinations of features, such as tire temperature and throttle, affect lap times.

Driver behavior patterns: Segment laps based on driving strategies and identify patterns in fast laps.

In [2]:
DROPPED = [
    "dist_360_SPEED", "dist_360_THROTTLE", "dist_360_STEER", "dist_360_BRAKE",
    "dist_360_CURRENTLAPTIMEINMS", "dist_360_LAPDISTANCE", "dist_360_WORLDPOSITIONX", "dist_360_WORLDPOSITIONY",
    "dist_360_WORLDFORWARDDIRX", "dist_360_WORLDFORWARDDIRY", "dist_360_YAW", "dist_360_PITCH",
    "dist_360_ROLL", "dist_360_left_dist", "dist_360_right_dist", "dist_360_dist_apex_1",
    "dist_360_dist_apex_2", "dist_360_angle_to_apex1", "dist_360_angle_to_apex2", "dist_360_proj_from_ref",
    "dist_430_SPEED", "dist_430_THROTTLE", "dist_430_STEER", "dist_430_BRAKE",
    "dist_430_CURRENTLAPTIMEINMS", "dist_430_LAPDISTANCE", "dist_430_WORLDPOSITIONX", "dist_430_WORLDPOSITIONY",
    "dist_430_WORLDFORWARDDIRX", "dist_430_WORLDFORWARDDIRY", "dist_430_YAW", "dist_430_PITCH",
    "dist_430_ROLL", "dist_430_left_dist", "dist_430_right_dist", "dist_430_dist_apex_1",
    "dist_430_dist_apex_2", "dist_430_angle_to_apex1", "dist_430_angle_to_apex2", "dist_430_proj_from_ref",
    "dist_530_SPEED", "dist_530_THROTTLE", "dist_530_STEER", "dist_530_BRAKE",
    "dist_530_CURRENTLAPTIMEINMS", "dist_530_LAPDISTANCE", "dist_530_WORLDPOSITIONX", "dist_530_WORLDPOSITIONY",
    "dist_530_WORLDFORWARDDIRX", "dist_530_WORLDFORWARDDIRY", "dist_530_YAW", "dist_530_PITCH",
    "dist_530_ROLL", "dist_530_left_dist", "dist_530_right_dist", "dist_530_dist_apex_1",
    "dist_530_dist_apex_2", "dist_530_angle_to_apex1", "dist_530_angle_to_apex2", "dist_530_proj_from_ref",
    "BPS_right_dist", "BPE_right_dist", "THS_right_dist", "THE_right_dist", "STS_right_dist",
    "STM_right_dist", "STE_right_dist", "APX1_right_dist", "APX2_right_dist", "BPS_CURRENTLAPTIMEINMS",
    "BPE_CURRENTLAPTIMEINMS", "THS_CURRENTLAPTIMEINMS", "THE_CURRENTLAPTIMEINMS", "STS_CURRENTLAPTIMEINMS",
    "STM_CURRENTLAPTIMEINMS", "STE_CURRENTLAPTIMEINMS", "APX1_CURRENTLAPTIMEINMS", "APX2_CURRENTLAPTIMEINMS"
]

In [3]:
data = pd.read_csv("Data3001G2/final_data_product.csv")
data = data.dropna().drop_duplicates().drop(columns=DROPPED)
data = data[data['Target_CURRENTLAPTIMEINMS'] < 60000]

In [None]:
i = 0
for c in data.columns:
    print(c)
    i+=1
print(i-2)
print(data.shape[1])

In [4]:
y = data["Target_CURRENTLAPTIMEINMS"]
X = data.drop(columns=["Target_CURRENTLAPTIMEINMS", "lap_id", "invalid_lap"])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.3, random_state=42)

In [5]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
m_info = mutual_info_regression(X_scaled, y)
Scores = pd.DataFrame(sorted(zip(X.columns, m_info), key=lambda x: x[1], reverse=True), columns=["feature", "mi_score"])
pd.set_option('display.max_rows', 20)
Scores

In [83]:
# corr = X.corr(method='pearson')

# high_corr = [
#     (i, j, corr.loc[i, j])
#     for i in corr.columns
#     for j in corr.columns
#     if i < j and abs(corr.loc[i, j]) > 0.5
# ]

# high_corr = sorted(high_corr, key=lambda x: abs(x[2]), reverse=True)
# high_corr_df = pd.DataFrame(high_corr, columns=['Feature 1', 'Feature 2', 'Spearman Corr'])
# pd.set_option('display.max_rows', 200)
# high_corr_df


In [26]:
vif_data = pd.DataFrame()
vif_data['feature'] = X.columns
vif_data["VIF"] = [round(variance_inflation_factor(X.values, i), 4) for i in range(X.shape[1])]

In [27]:
pd.set_option("display.max_rows", 200)
pd.options.display.float_format = '{:.2f}'.format
vif_data.sort_values(by="VIF")

Unnamed: 0,feature,VIF
149,APX1_ROLL,1.89
120,STE_STEER,1.95
43,THS_BRAKE,2.34
3,BPS_BRAKE,2.53
75,THE_angle_to_apex1,2.56
172,APX2_angle_to_apex2,2.64
21,BPE_THROTTLE,2.83
103,STM_BRAKE,3.29
153,APX1_angle_to_apex1,3.33
71,THE_ROLL,3.4


In [9]:
poly_parameters = {
    'kernel': ['poly'],
    'degree': [3, 5, 7, 9],
    'gamma': ['scale', 'auto'],
    'coef0': [0, 1, 3],
    'tol': [1e-3],
    'C': [0.01, 0.1, 1],
    'epsilon': [0.01, 0.05, 0.1, 1],
    'shrinking': [True],   # boolean, not string
    'verbose': [False],
    'max_iter': [-1]
}


rbf_parameters = {
    'kernel': ['rbf'],
    'gamma': ['scale', 'auto'],
    'tol': [1e-3],
    'C': [0.01, 0.1, 1],
    'epsilon': [0.01, 0.05, 0.1, 0.5, 1],
    'shrinking': [True],   # boolean, not string
    'verbose': [False],
    'max_iter': [-1]
}

In [11]:
grid_poly = GridSearchCV(
    estimator=SVR(),
    param_grid=poly_parameters,
    cv=5,
    scoring='neg_root_mean_squared_error', # check others
    n_jobs=-1,
    verbose=2
)

grid_poly.fit(X_train_scaled, y_train)
print("Best parameters:", grid_poly.best_params_)
print("Best RMSE:", abs(grid_poly.best_score_))

grid_rbf = GridSearchCV(
    estimator=SVR(),
    param_grid=rbf_parameters,
    cv=5,
    scoring='neg_root_mean_squared_error', # check others
    n_jobs=-1,
    verbose=3
)

grid_rbf.fit(X_train_scaled, y_train)
print("Best parameters:", grid_rbf.best_params_)
print("Best RMSE:", abs(grid_rbf.best_score_))

Fitting 5 folds for each of 288 candidates, totalling 1440 fits
Best parameters: {'C': 1, 'coef0': 3, 'degree': 7, 'epsilon': 0.01, 'gamma': 'auto', 'kernel': 'poly', 'max_iter': -1, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Best RMSE: 2646.4937140116726
Fitting 5 folds for each of 30 candidates, totalling 150 fits
Best parameters: {'C': 1, 'epsilon': 0.01, 'gamma': 'auto', 'kernel': 'rbf', 'max_iter': -1, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Best RMSE: 4333.2664329003655


In [12]:
pred_poly_y = grid_poly.predict(X_test_scaled)
pred_rbf_y = grid_rbf.predict(X_test_scaled)

In [14]:
rmse_poly = root_mean_squared_error(pred_poly_y, y_test)
rmse_rbf = root_mean_squared_error(pred_rbf_y, y_test)
print("RMSE for poly kernel:", rmse_poly)
print("RMSE for rbf kernel:", rmse_rbf)

RMSE for poly kernel: 3160.6985335006025
RMSE for rbf kernel: 4937.90765073188
