# Exploration phase — initial testing
Here we try out different models on our data. The comments below explain exactly what is happening.

In [2]:
import f1_pitstop_advisor.data_processing_utils as utils
import pandas as pd
import numpy as np
import pickle

from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, ElasticNetCV
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor

from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline

from sklearn.base import clone

import time

In [None]:
# Load in all sessions
with open("ig_sessions.pickle", "rb") as file:
    sessions = pickle.load(file)


In [None]:
# Aggregate laps from all sessions to the circuit they were performed on.

# We end up with a dictionary where the keys are circuit names, and the values 
# are DataFrames containing data for all laps for each circuit.
circuits = set()
for session in sessions:
    circuits.add(session.session_info["Meeting"]["Circuit"]["ShortName"])

dfs = {}
for circuit in circuits:
    dfs[circuit] = utils.get_refined_lap_data_with_z_score_for_circuit(sessions, circuit)


In [None]:
# Prepare regressor configurations for testing

# We test many algorithms with parameter tuning using GridSearchCV.
# The GridSearchCVs here will be used as templates. For each circuit, 
# every of the GridSearchCVs below will be cloned and fitted to their data.

# GridSearchCV configurations
model_searches = {
    # Linear regression
    "LinearRegression": GridSearchCV(
        make_pipeline(StandardScaler(), PCA(), LinearRegression()),
        {"pca__n_components": [0.98, 0.95, 0.9]}
    ),

    "RidgeCV": GridSearchCV(
        make_pipeline(StandardScaler(), PCA(), RidgeCV(alphas=(0.1, 1.0, 10.0))),
        {"pca__n_components": [0.98, 0.95, 0.9]}
    ),

    "LassoCV": GridSearchCV(
        make_pipeline(StandardScaler(), PCA(), LassoCV(max_iter=100_000, alphas=[0.001, 0.01, 0.1, 1.0])),
        {"pca__n_components": [0.98, 0.95, 0.9]}
    ),

    "ElasticNetCV": GridSearchCV(
        make_pipeline(StandardScaler(), PCA(), ElasticNetCV(max_iter=100_000, l1_ratio=[0.2, 0.5, 0.8])),
        {"pca__n_components": [0.98, 0.95, 0.9]}
    ),

    # Polynomial regression
    "PolynomialLinearRegression": GridSearchCV(
        make_pipeline(StandardScaler(), PCA(), PolynomialFeatures(), LinearRegression()),
        {
            "polynomialfeatures__degree": [2, 3],
            "pca__n_components": [0.98, 0.95, 0.9]
        }
    ),

    "PolynomialRidgeCV": GridSearchCV(
        make_pipeline(StandardScaler(), PCA(), PolynomialFeatures(), RidgeCV(alphas=(0.1, 1.0, 10.0))),
        {
            "polynomialfeatures__degree": [2, 3],
            "pca__n_components": [0.98, 0.95, 0.9]
        }
    ),

    "PolynomialLassoCV": GridSearchCV(
        make_pipeline(StandardScaler(), PCA(), PolynomialFeatures(), LassoCV(max_iter=100_000, alphas=[0.001, 0.01, 0.1])),
        {
            "polynomialfeatures__degree": [2, 3],
            "pca__n_components": [0.98, 0.95, 0.9]
        }
    ),

    "PolynomialElasticNetCV": GridSearchCV(
        make_pipeline(StandardScaler(), PCA(), PolynomialFeatures(), ElasticNetCV(max_iter=100_000, l1_ratio=[0.2, 0.5, 0.8])),
        {
            "polynomialfeatures__degree": [2, 3],
            "pca__n_components": [0.98, 0.95, 0.9]
        }
    ),

    # Bagging models
    "RandomForestRegressor": GridSearchCV(
        RandomForestRegressor(random_state=42, n_jobs=-1),
        {
            "n_estimators": [100, 200, 400],
            "max_depth": [5, 10, 20, None],
            "min_samples_split": [2, 5, 10]
        }
    ),

    "ExtraTreesRegressor": GridSearchCV(
        ExtraTreesRegressor(random_state=42, n_jobs=-1),
        {
            "n_estimators": [100, 200, 400],
            "max_depth": [5, 10, 20, None],
            "min_samples_split": [2, 5, 10]
        }
    ),

    # Boosting models
    "AdaBoostRegressor": GridSearchCV(
        AdaBoostRegressor(random_state=42),
        {
            "n_estimators": [50, 100, 200],
            "learning_rate": [0.01, 0.1, 0.5, 1.0]
        }
    ),

    "GradientBoostingRegressor": GridSearchCV(
        GradientBoostingRegressor(random_state=42),
        {
            "n_estimators": [100, 200],
            "learning_rate": [0.01, 0.05, 0.1],
            "max_depth": [3, 5],
            "subsample": [0.8, 1.0]
        }
    ),

    "XGBRegressor": GridSearchCV(
        XGBRegressor(random_state=42, n_jobs=-1, objective="reg:squarederror", verbosity=0),
        {
            "n_estimators": [100, 200, 400],
            "max_depth": [3, 6, 10],
            "learning_rate": [0.01, 0.1, 0.3],
            "subsample": [0.8, 1.0],
            "colsample_bytree": [0.8, 1.0]
        }
    ),

    # Support vector models
    "SVR_linear": GridSearchCV(
        make_pipeline(StandardScaler(), PCA(), SVR(kernel="linear")),
        {
            "svr__C": [0.1, 1, 10, 100],
            "pca__n_components": [0.98, 0.95, 0.9]
        }
    ),

    "SVR_rbf": GridSearchCV(
        make_pipeline(StandardScaler(), PCA(), SVR(kernel="rbf")),
        {
            "svr__C": [0.1, 1, 10],
            "svr__gamma": ["scale", 0.01, 0.1, 1.0],
            "pca__n_components": [0.98, 0.95, 0.9]
        }
    ),

    # MLP
    "MLPRegressor": GridSearchCV(
        make_pipeline(StandardScaler(), PCA(), MLPRegressor(max_iter=100_000, random_state=42)),
        {
            "mlpregressor__hidden_layer_sizes": [(16,), (24,), (24, 12), (16, 16), (16, 8)],
            "mlpregressor__activation": ["relu", "tanh"],
            "mlpregressor__alpha": [0.0001, 0.001, 0.01],
            "mlpregressor__learning_rate_init": [0.001, 0.01],
            "pca__n_components": [0.98, 0.95, 0.9]
        }
    ),
}

In [None]:
# Fit every single circuit/GridSearch configuration
models_and_circuits = {}

for name in model_searches.keys():
    models_and_circuits[name] = {}

for circuit, data in dfs.items():
    print(f"Fitting models for {circuit}")
    circuit_start = time.time()
    
    X, y = data.drop(["LapTimeZScore"], axis="columns"), data["LapTimeZScore"]
    for name, model_search in model_searches.items():
        print(f"Fitting {name};".ljust(50), end="")
        model_start = time.time()

        model_search_copy = clone(model_search)
        model_search_copy.fit(X, y)
        models_and_circuits[name][circuit] = model_search_copy

        print(f"took {round(time.time() - model_start, 2)} seconds")
    
    print(f"Took a total of {round(time.time() - circuit_start, 2)} seconds to fit all models for circuit \"{circuit}\"")
    print()

    

Fitting models for Catalunya
Fitting LinearRegression;                         took 0.47 seconds
Fitting RidgeCV;                                  took 0.45 seconds
Fitting LassoCV;                                  took 0.58 seconds
Fitting ElasticNetCV;                             took 3.68 seconds
Fitting PolynomialLinearRegression;               took 2.3 seconds
Fitting PolynomialRidgeCV;                        took 5.25 seconds
Fitting PolynomialLassoCV;                        

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gra

took 797.82 seconds
Fitting PolynomialElasticNetCV;                   

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

took 7900.77 seconds
Fitting RandomForestRegressor;                    took 179.21 seconds
Fitting ExtraTreesRegressor;                      took 112.69 seconds
Fitting AdaBoostRegressor;                        took 34.41 seconds
Fitting GradientBoostingRegressor;                took 102.77 seconds
Fitting XGBRegressor;                             took 186.55 seconds
Fitting SVR_linear;                               took 1515.51 seconds
Fitting SVR_rbf;                                  took 153.95 seconds
Fitting MLPRegressor;                             took 2893.21 seconds
Took a total of 13889.63 seconds to fit all models for circuit "Catalunya"

Fitting models for Spa-Francorchamps
Fitting LinearRegression;                         took 0.27 seconds
Fitting RidgeCV;                                  took 0.29 seconds
Fitting LassoCV;                                  took 0.39 seconds
Fitting ElasticNetCV;                             took 2.77 seconds
Fitting PolynomialLinearRegressio

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

took 224.56 seconds
Fitting PolynomialElasticNetCV;                   took 810.29 seconds
Fitting RandomForestRegressor;                    took 125.96 seconds
Fitting ExtraTreesRegressor;                      took 83.53 seconds
Fitting AdaBoostRegressor;                        took 28.06 seconds
Fitting GradientBoostingRegressor;                took 67.25 seconds
Fitting XGBRegressor;                             took 159.52 seconds
Fitting SVR_linear;                               took 712.67 seconds
Fitting SVR_rbf;                                  took 50.43 seconds
Fitting MLPRegressor;                             took 2060.26 seconds
Took a total of 4332.37 seconds to fit all models for circuit "Suzuka"

Fitting models for Paul Ricard
Fitting LinearRegression;                         took 0.24 seconds
Fitting RidgeCV;                                  took 0.29 seconds
Fitting LassoCV;                                  took 0.38 seconds
Fitting ElasticNetCV;                         

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

took 102.26 seconds
Fitting PolynomialElasticNetCV;                   took 46.76 seconds
Fitting RandomForestRegressor;                    took 114.35 seconds
Fitting ExtraTreesRegressor;                      took 75.54 seconds
Fitting AdaBoostRegressor;                        took 8.34 seconds
Fitting GradientBoostingRegressor;                took 23.69 seconds
Fitting XGBRegressor;                             took 84.81 seconds
Fitting SVR_linear;                               took 59.2 seconds
Fitting SVR_rbf;                                  took 7.6 seconds
Fitting MLPRegressor;                             took 847.58 seconds
Took a total of 1376.11 seconds to fit all models for circuit "Paul Ricard"

Fitting models for Baku
Fitting LinearRegression;                         took 0.31 seconds
Fitting RidgeCV;                                  took 0.32 seconds
Fitting LassoCV;                                  took 0.55 seconds
Fitting ElasticNetCV;                             took 3

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

took 192.09 seconds
Fitting PolynomialElasticNetCV;                   

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

took 1441.48 seconds
Fitting RandomForestRegressor;                    took 119.68 seconds
Fitting ExtraTreesRegressor;                      took 102.5 seconds
Fitting AdaBoostRegressor;                        took 16.36 seconds
Fitting GradientBoostingRegressor;                took 61.77 seconds
Fitting XGBRegressor;                             took 146.48 seconds
Fitting SVR_linear;                               took 360.62 seconds
Fitting SVR_rbf;                                  took 44.09 seconds
Fitting MLPRegressor;                             took 2210.14 seconds
Took a total of 4704.59 seconds to fit all models for circuit "Baku"

Fitting models for Hungaroring
Fitting LinearRegression;                         took 0.45 seconds
Fitting RidgeCV;                                  took 0.51 seconds
Fitting LassoCV;                                  took 0.59 seconds
Fitting ElasticNetCV;                             took 3.85 seconds
Fitting PolynomialLinearRegression;              

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(


took 117.8 seconds
Fitting PolynomialElasticNetCV;                   took 314.58 seconds
Fitting RandomForestRegressor;                    took 200.36 seconds
Fitting ExtraTreesRegressor;                      took 121.31 seconds
Fitting AdaBoostRegressor;                        took 31.09 seconds
Fitting GradientBoostingRegressor;                took 110.0 seconds
Fitting XGBRegressor;                             took 192.22 seconds
Fitting SVR_linear;                               took 1345.13 seconds
Fitting SVR_rbf;                                  took 183.04 seconds
Fitting MLPRegressor;                             took 3296.8 seconds
Took a total of 5925.86 seconds to fit all models for circuit "Hungaroring"

Fitting models for Monte Carlo
Fitting LinearRegression;                         took 0.49 seconds
Fitting RidgeCV;                                  took 0.55 seconds
Fitting LassoCV;                                  took 0.7 seconds
Fitting ElasticNetCV;                    

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(


took 249.9 seconds
Fitting PolynomialElasticNetCV;                   took 143.14 seconds
Fitting RandomForestRegressor;                    took 195.95 seconds
Fitting ExtraTreesRegressor;                      took 117.85 seconds
Fitting AdaBoostRegressor;                        took 46.76 seconds
Fitting GradientBoostingRegressor;                took 114.99 seconds
Fitting XGBRegressor;                             took 193.33 seconds
Fitting SVR_linear;                               took 1690.79 seconds
Fitting SVR_rbf;                                  took 147.95 seconds
Fitting MLPRegressor;                             took 2528.82 seconds
Took a total of 5446.47 seconds to fit all models for circuit "Monte Carlo"

Fitting models for Silverstone
Fitting LinearRegression;                         took 0.39 seconds
Fitting RidgeCV;                                  took 0.41 seconds
Fitting LassoCV;                                  took 0.53 seconds
Fitting ElasticNetCV;                 

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(


took 87.58 seconds
Fitting PolynomialElasticNetCV;                   took 308.58 seconds
Fitting RandomForestRegressor;                    took 153.88 seconds
Fitting ExtraTreesRegressor;                      took 107.76 seconds
Fitting AdaBoostRegressor;                        took 24.58 seconds
Fitting GradientBoostingRegressor;                took 64.43 seconds
Fitting XGBRegressor;                             took 153.17 seconds
Fitting SVR_linear;                               took 541.82 seconds
Fitting SVR_rbf;                                  took 59.42 seconds
Fitting MLPRegressor;                             took 2406.03 seconds
Took a total of 3916.13 seconds to fit all models for circuit "Yas Marina Circuit"

Fitting models for Las Vegas
Fitting LinearRegression;                         took 0.32 seconds
Fitting RidgeCV;                                  took 0.34 seconds
Fitting LassoCV;                                  took 0.44 seconds
Fitting ElasticNetCV;               

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

took 128.05 seconds
Fitting PolynomialElasticNetCV;                   took 148.65 seconds
Fitting RandomForestRegressor;                    took 130.88 seconds
Fitting ExtraTreesRegressor;                      took 91.86 seconds
Fitting AdaBoostRegressor;                        took 18.81 seconds
Fitting GradientBoostingRegressor;                took 37.91 seconds
Fitting XGBRegressor;                             took 128.08 seconds
Fitting SVR_linear;                               took 174.35 seconds
Fitting SVR_rbf;                                  took 24.31 seconds
Fitting MLPRegressor;                             took 1498.18 seconds
Took a total of 2388.43 seconds to fit all models for circuit "Las Vegas"

Fitting models for Spielberg
Fitting LinearRegression;                         took 0.3 seconds
Fitting RidgeCV;                                  took 0.4 seconds
Fitting LassoCV;                                  took 0.54 seconds
Fitting ElasticNetCV;                          

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(


took 85.9 seconds
Fitting PolynomialElasticNetCV;                   took 59.74 seconds
Fitting RandomForestRegressor;                    took 125.04 seconds
Fitting ExtraTreesRegressor;                      took 83.21 seconds
Fitting AdaBoostRegressor;                        took 10.36 seconds
Fitting GradientBoostingRegressor;                took 28.09 seconds
Fitting XGBRegressor;                             took 96.58 seconds
Fitting SVR_linear;                               took 68.57 seconds
Fitting SVR_rbf;                                  took 9.12 seconds
Fitting MLPRegressor;                             took 1035.96 seconds
Took a total of 1611.27 seconds to fit all models for circuit "Spielberg"

Fitting models for Singapore
Fitting LinearRegression;                         took 0.46 seconds
Fitting RidgeCV;                                  took 0.51 seconds
Fitting LassoCV;                                  took 0.71 seconds
Fitting ElasticNetCV;                             t

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(


took 111.64 seconds
Fitting PolynomialElasticNetCV;                   took 271.02 seconds
Fitting RandomForestRegressor;                    took 136.69 seconds
Fitting ExtraTreesRegressor;                      took 101.72 seconds
Fitting AdaBoostRegressor;                        took 16.66 seconds
Fitting GradientBoostingRegressor;                took 46.03 seconds
Fitting XGBRegressor;                             took 135.02 seconds
Fitting SVR_linear;                               took 295.85 seconds
Fitting SVR_rbf;                                  took 33.59 seconds
Fitting MLPRegressor;                             took 2523.96 seconds
Took a total of 3681.22 seconds to fit all models for circuit "Miami"

Fitting models for Austin
Fitting LinearRegression;                         took 0.29 seconds
Fitting RidgeCV;                                  took 0.46 seconds
Fitting LassoCV;                                  took 0.81 seconds
Fitting ElasticNetCV;                             t

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

took 156.32 seconds
Fitting PolynomialElasticNetCV;                   

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

took 1445.62 seconds
Fitting RandomForestRegressor;                    took 120.6 seconds
Fitting ExtraTreesRegressor;                      took 78.36 seconds
Fitting AdaBoostRegressor;                        took 13.26 seconds
Fitting GradientBoostingRegressor;                took 25.1 seconds
Fitting XGBRegressor;                             took 106.01 seconds
Fitting SVR_linear;                               took 73.61 seconds
Fitting SVR_rbf;                                  took 9.51 seconds
Fitting MLPRegressor;                             took 1752.38 seconds
Took a total of 3788.66 seconds to fit all models for circuit "Austin"

Fitting models for Zandvoort
Fitting LinearRegression;                         took 0.63 seconds
Fitting RidgeCV;                                  took 0.86 seconds
Fitting LassoCV;                                  took 0.85 seconds
Fitting ElasticNetCV;                             took 4.69 seconds
Fitting PolynomialLinearRegression;               too

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gra

took 2361.77 seconds
Fitting PolynomialElasticNetCV;                   

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

took 5196.34 seconds
Fitting RandomForestRegressor;                    took 201.42 seconds
Fitting ExtraTreesRegressor;                      took 127.24 seconds
Fitting AdaBoostRegressor;                        took 44.75 seconds
Fitting GradientBoostingRegressor;                took 121.56 seconds
Fitting XGBRegressor;                             took 198.87 seconds
Fitting SVR_linear;                               took 2136.85 seconds
Fitting SVR_rbf;                                  took 175.16 seconds
Fitting MLPRegressor;                             took 4561.06 seconds
Took a total of 15145.07 seconds to fit all models for circuit "Zandvoort"

Fitting models for Imola
Fitting LinearRegression;                         took 0.39 seconds
Fitting RidgeCV;                                  took 0.64 seconds
Fitting LassoCV;                                  took 0.67 seconds
Fitting ElasticNetCV;                             took 3.62 seconds
Fitting PolynomialLinearRegression;          

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

took 162.75 seconds
Fitting PolynomialElasticNetCV;                   

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

took 766.91 seconds
Fitting RandomForestRegressor;                    took 144.57 seconds
Fitting ExtraTreesRegressor;                      took 101.4 seconds
Fitting AdaBoostRegressor;                        took 22.75 seconds
Fitting GradientBoostingRegressor;                took 51.36 seconds
Fitting XGBRegressor;                             took 146.3 seconds
Fitting SVR_linear;                               took 343.68 seconds
Fitting SVR_rbf;                                  took 38.02 seconds
Fitting MLPRegressor;                             took 3029.8 seconds
Took a total of 4816.9 seconds to fit all models for circuit "Imola"

Fitting models for Melbourne
Fitting LinearRegression;                         took 0.46 seconds
Fitting RidgeCV;                                  took 0.6 seconds
Fitting LassoCV;                                  took 0.68 seconds
Fitting ElasticNetCV;                             took 4.56 seconds
Fitting PolynomialLinearRegression;               took 

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

took 311.39 seconds
Fitting PolynomialElasticNetCV;                   took 440.53 seconds
Fitting RandomForestRegressor;                    took 177.61 seconds
Fitting ExtraTreesRegressor;                      took 122.62 seconds
Fitting AdaBoostRegressor;                        took 32.24 seconds
Fitting GradientBoostingRegressor;                took 80.35 seconds
Fitting XGBRegressor;                             took 178.18 seconds
Fitting SVR_linear;                               took 648.54 seconds
Fitting SVR_rbf;                                  took 79.69 seconds
Fitting MLPRegressor;                             took 3631.28 seconds
Took a total of 5716.17 seconds to fit all models for circuit "Melbourne"

Fitting models for Montreal
Fitting LinearRegression;                         took 0.79 seconds
Fitting RidgeCV;                                  took 1.31 seconds
Fitting LassoCV;                                  took 0.89 seconds
Fitting ElasticNetCV;                        

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

took 526.53 seconds
Fitting PolynomialElasticNetCV;                   

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(


took 1145.62 seconds
Fitting RandomForestRegressor;                    took 199.54 seconds
Fitting ExtraTreesRegressor;                      took 124.06 seconds
Fitting AdaBoostRegressor;                        took 44.72 seconds
Fitting GradientBoostingRegressor;                took 120.2 seconds
Fitting XGBRegressor;                             took 209.52 seconds
Fitting SVR_linear;                               took 1608.16 seconds
Fitting SVR_rbf;                                  took 143.11 seconds
Fitting MLPRegressor;                             took 4754.16 seconds
Took a total of 8894.65 seconds to fit all models for circuit "Montreal"

Fitting models for Monza
Fitting LinearRegression;                         took 0.45 seconds
Fitting RidgeCV;                                  took 0.49 seconds
Fitting LassoCV;                                  took 0.81 seconds
Fitting ElasticNetCV;                             took 5.07 seconds
Fitting PolynomialLinearRegression;             

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(


took 70.95 seconds
Fitting PolynomialElasticNetCV;                   took 52.55 seconds
Fitting RandomForestRegressor;                    took 184.71 seconds
Fitting ExtraTreesRegressor;                      took 133.88 seconds
Fitting AdaBoostRegressor;                        took 35.71 seconds
Fitting GradientBoostingRegressor;                took 87.08 seconds
Fitting XGBRegressor;                             took 189.13 seconds
Fitting SVR_linear;                               took 738.92 seconds
Fitting SVR_rbf;                                  took 81.53 seconds
Fitting MLPRegressor;                             took 4580.48 seconds
Took a total of 6165.97 seconds to fit all models for circuit "Mexico City"

Fitting models for Jeddah
Fitting LinearRegression;                         took 0.42 seconds
Fitting RidgeCV;                                  took 0.49 seconds
Fitting LassoCV;                                  took 0.65 seconds
Fitting ElasticNetCV;                          

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descen

took 134.9 seconds
Fitting PolynomialElasticNetCV;                   took 192.51 seconds
Fitting RandomForestRegressor;                    took 167.07 seconds
Fitting ExtraTreesRegressor;                      took 125.96 seconds
Fitting AdaBoostRegressor;                        took 28.12 seconds
Fitting GradientBoostingRegressor;                took 70.27 seconds
Fitting XGBRegressor;                             took 179.27 seconds
Fitting SVR_linear;                               took 610.35 seconds
Fitting SVR_rbf;                                  took 63.83 seconds
Fitting MLPRegressor;                             took 5164.3 seconds
Took a total of 6747.14 seconds to fit all models for circuit "Jeddah"



In [None]:
# Save models for later use
with open("ig_initial_models.pickle", "wb") as file:
    pickle.dump(models_and_circuits, file)

In [None]:
# Show scores for each GridSearch and circuit
all_scores = {}
for key in models_and_circuits.keys():
    scores = {}
    for circuit, model in models_and_circuits[key].items():
        scores[circuit] = model.best_score_
    all_scores[key] = scores

all_scores = pd.DataFrame(all_scores)

all_scores

Unnamed: 0,LinearRegression,RidgeCV,LassoCV,ElasticNetCV,PolynomialLinearRegression,PolynomialRidgeCV,PolynomialLassoCV,PolynomialElasticNetCV,RandomForestRegressor,ExtraTreesRegressor,AdaBoostRegressor,GradientBoostingRegressor,XGBRegressor,SVR_linear,SVR_rbf,MLPRegressor
Catalunya,0.085793,0.085885,0.085538,0.085377,-0.245842,-0.002601,0.134024,0.13713,0.706377,0.676369,0.555309,0.696877,0.720373,-0.019466,0.13261,0.551058
Spa-Francorchamps,0.237564,0.237698,0.237862,0.237598,0.251023,0.283721,0.345422,0.394839,0.89706,0.886641,0.837479,0.888572,0.896406,0.138567,0.425036,0.79384
Suzuka,0.198668,0.198757,0.199619,0.197973,-0.051732,0.159406,0.267861,0.32392,0.799303,0.813051,0.740749,0.812979,0.82712,0.092339,0.30061,0.524172
Paul Ricard,0.303449,0.30437,0.303636,0.30254,-0.21282,0.50094,0.502671,0.540332,0.867879,0.885019,0.812276,0.887517,0.894289,0.210537,0.764616,0.846867
Baku,0.133529,0.133642,0.116061,0.132621,0.215403,0.227631,0.226944,0.243392,0.692919,0.690886,0.448231,0.718906,0.710114,0.076416,0.332562,0.448516
Hungaroring,0.103338,0.103394,0.103857,0.103287,0.138022,0.145742,0.190154,0.157958,0.775962,0.806652,0.597263,0.798287,0.806812,0.012935,0.181275,0.581401
Monte Carlo,0.344647,0.344824,0.318985,0.329726,-0.701965,0.518859,0.532107,0.547871,0.843848,0.852382,0.538646,0.851637,0.875379,0.353488,0.661588,0.718002
Silverstone,0.218916,0.219346,0.220024,0.224788,0.040605,0.03653,0.294425,0.312058,0.792625,0.774395,0.331917,0.808092,0.817017,0.107333,0.415707,0.440296
Yas Marina Circuit,0.104803,0.104856,0.104231,0.10439,0.034675,0.142783,0.160288,0.176009,0.824141,0.813251,0.71223,0.825207,0.817183,0.028915,0.227905,0.692824
Las Vegas,0.268112,0.268168,0.268157,0.268167,0.238797,0.372594,0.382143,0.38444,0.835108,0.822515,0.596539,0.849928,0.847659,0.120457,0.395584,0.625624


In [None]:
# Show score statistics for each model
# MinScore is very important. A good model should perform reasonably well for all tracks.
model_scores_df = pd.DataFrame({
    "MeanScore": all_scores.mean(axis="index"),
    "MedianScore": all_scores.median(axis="index"),
    "ScoreVariance": all_scores.var(axis="index"),
    "MinScore": all_scores.min(axis="index")
})

model_scores_df.sort_values(by=["MeanScore"], ascending=False)

Unnamed: 0,MeanScore,MedianScore,ScoreVariance,MinScore
XGBRegressor,0.821699,0.842696,0.006347,0.647718
GradientBoostingRegressor,0.805995,0.830576,0.010052,0.512768
RandomForestRegressor,0.777065,0.818014,0.022196,0.197468
ExtraTreesRegressor,0.775533,0.812568,0.01712,0.300264
AdaBoostRegressor,0.568267,0.546978,0.035071,0.327699
MLPRegressor,0.563665,0.524406,0.020559,0.333441
SVR_rbf,0.40463,0.39617,0.03287,0.13261
PolynomialElasticNetCV,0.29143,0.242206,0.021433,0.13713
PolynomialLassoCV,0.272812,0.227749,0.020964,0.128623
RidgeCV,0.160499,0.145948,0.006635,0.061549


## Result interpretation
### The top-2
**XGBRegressor is a clear winner**. The lowest score it got is over 0.64, mean and median scores are highest of all models, while score variance is low. It is clear that this algorithm reliably provides good results.

**GradientBoostingRegressor** is a close runner up, with similar characteristics, albeit somewhat less accurate and less consistent. This does not come as a surprise, since it uses a similar but less advanced algorithm to XGBoost. 

### Remaining results
The rest of the models have serious flaws. For example, **RandomForestRegressor**, despite having decent overall scores, has a higher score variance and got a score below 0.2 for one of the tracks. **ExtraTreesRegressor** is better in that regard, but still inferior to out top-2 models. 

The rest of the regressors perform significantly worse than the others, with versions of polynomial and linear regression having particularly low performance. There are some outlying values, even negative ones, in these models. Considering that XGBoost is a clear winner, I do not deem it necessary to look into this further at this point.

### To sum up
It appears that *boosting models*, particularly XGBRegressor and GradientBoostingRegressor, are the best. These are the models that will be optimized and tested further.

<br><br><br>


In [41]:
# Show how the best models perform on every circuit
relevant_scores = all_scores.loc[:, ["XGBRegressor", "GradientBoostingRegressor"]]
track_scores_df = pd.DataFrame({
    "MeanScore": relevant_scores.mean(axis="columns"),
    "XGBRegressorScore": relevant_scores["XGBRegressor"],
    "GradientBoostingRegressorScore": relevant_scores["GradientBoostingRegressor"],
    "DataPointCount": [df.shape[0] for df in dfs.values()]
})
track_scores_df.sort_values(by=["MeanScore"])

Unnamed: 0,MeanScore,XGBRegressorScore,GradientBoostingRegressorScore,DataPointCount
Singapore,0.580243,0.647718,0.512768,4304
Monza,0.639459,0.665123,0.613795,3898
Catalunya,0.708625,0.720373,0.696877,5054
Baku,0.71451,0.710114,0.718906,2734
Sakhir,0.730059,0.73695,0.723169,4414
Hungaroring,0.80255,0.806812,0.798287,5357
Silverstone,0.812555,0.817017,0.808092,3433
Miami,0.81269,0.815275,0.810105,2160
Suzuka,0.820049,0.82712,0.812979,3186
Yas Marina Circuit,0.821195,0.817183,0.825207,3307


## About scores by circuit
Scores clearly vary a lot depending on the circuit. It is important to note that both the characteristics of the circuit itself, as well as how much data we have on each circuit has a big effect. For some circuits we only have data from one session, which is an obvious limitation and could affect score in different ways. The score tends to be higher for circuits with less than 2000 data points, possibly because the data points only come from one or two sessions in those cases. This makes it very likely for weather to be roughly constant throughout the data relevant to them, skewing CV results in favour of the model.

### Point in favour of the results
Even in the worst case, XGBoost had a mean score of over 0.64, which means it accounted for 64% of target attribute variance. <br> 
The target attribute in this case the driver's lap time z-score within each session. Z-score in this case basically denotes how good the lap was compared to the other laps the same driver completed in the same session. This means that for the most unpredictable circuit, our model accounted for 64% of how pit stops and weather affect driver performance. For over 70% of the circuits, the model accounted for over 80% of those differences.

Therefore, it is clear to me that boosting models can produce decent-to-excellent results in general, even if some scores are exaggerated due to insufficient data size.