In [1]:
import sqlite3

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, precision_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn import svm
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

year_test = 2023

## Connect to Database
create train and test data

In [2]:
connection = sqlite3.connect('races.db')
table_name = 'i_df_dum_table'
query = f'SELECT * FROM "{table_name}"'
data = pd.read_sql_query(query, connection)

table_name = 'h_final_cleaned_table'
query = f'SELECT * FROM "{table_name}"'
info = pd.read_sql_query(query, connection)
connection.close()

data = data.reset_index(drop=True)

df = data.copy()
df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

train = df[df.season < year_test]
X_train = train.drop(['driver', 'podium'], axis = 1)
y_train = train.podium

scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns = X_train.columns)

## Find right Params
for the model to be the most accurate

In [3]:
# scoring function for classification

def score_classification(model):
    score = 0
    for circuit in df[df.season == year_test]['round'].unique():

        test = df[(df.season == year_test) & (df['round'] == circuit)]
        X_test = test.drop(['driver', 'podium'], axis = 1)
        y_test = test.podium

        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns = X_test.columns)

        # make predictions
        prediction_df = pd.DataFrame(model.predict_proba(X_test), columns = ['proba_0', 'proba_1'])
        prediction_df['actual'] = y_test.reset_index(drop = True)
        prediction_df.sort_values('proba_1', ascending = False, inplace = True)
        prediction_df.reset_index(inplace = True, drop = True)
        prediction_df['predicted'] = prediction_df.index
        prediction_df['predicted'] = prediction_df.predicted.map(lambda x: 1 if x == 0 else 0)

        score += precision_score(prediction_df.actual, prediction_df.predicted)

    model_score = score / len(df[df.season == year_test]['round'].unique())
    return model_score

In [None]:
# Neural network
comparison_dict ={'model':[],
                  'params': [],
                  'score': []}

params={'hidden_layer_sizes': [(80,20,40,5), (75,25,50,10)], 
        'activation': ['identity', 'logistic', 'tanh', 'relu'], 
        'solver': ['lbfgs', 'sgd', 'adam'], 
        'alpha': np.logspace(-4,2,20)} 


for hidden_layer_sizes in params['hidden_layer_sizes']:
    for activation in params['activation']:
        for solver in params['solver']:
            for alpha in params['alpha']:
                model_params = (hidden_layer_sizes, activation, solver, alpha )
                model = MLPClassifier(hidden_layer_sizes = hidden_layer_sizes,
                                      activation = activation, solver = solver, alpha = alpha, random_state = 1)
                model.fit(X_train, y_train)

                model_score = score_classification(model)
                
                # Print the current model's parameters and score
                print(f"Model Parameters: {model_params}")
                print(f"Model Score: {model_score}")


                comparison_dict['model'].append('neural_network_classifier')
                comparison_dict['params'].append(model_params)
                comparison_dict['score'].append(model_score)

Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 0.0001)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 0.00020691380811147902)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 0.00042813323987193956)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 0.0008858667904100823)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 0.0018329807108324356)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 0.00379269019073225)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 0.007847599703514606)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 0.01623776739188721)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 0.03359818286283781)
Model Score: 0.7857142857142857
Model Parame

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 0.615848211066026)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 1.2742749857031321)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 2.6366508987303554)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 5.455594781168514)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 11.288378916846883)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 23.357214690901213)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 48.32930238571752)
Model Score: 0.8571428571428571
Model Parameters: ((80, 20, 40, 5), 'identity', 'lbfgs', 100.0)
Model Score: 0.8571428571428571
Model Parameters: ((80, 20, 40, 5), 'identity', 'sgd', 0.0001)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', '



Model Parameters: ((80, 20, 40, 5), 'identity', 'sgd', 0.14384498882876628)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'identity', 'sgd', 0.29763514416313164)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'identity', 'sgd', 0.615848211066026)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'identity', 'sgd', 1.2742749857031321)
Model Score: 0.8571428571428571




Model Parameters: ((80, 20, 40, 5), 'identity', 'sgd', 2.6366508987303554)
Model Score: 0.8571428571428571
Model Parameters: ((80, 20, 40, 5), 'identity', 'sgd', 5.455594781168514)
Model Score: 0.9285714285714286
Model Parameters: ((80, 20, 40, 5), 'identity', 'sgd', 11.288378916846883)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'sgd', 23.357214690901213)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'sgd', 48.32930238571752)
Model Score: 0.6428571428571429
Model Parameters: ((80, 20, 40, 5), 'identity', 'sgd', 100.0)
Model Score: 0.6428571428571429
Model Parameters: ((80, 20, 40, 5), 'identity', 'adam', 0.0001)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'adam', 0.00020691380811147902)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'adam', 0.00042813323987193956)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'identity', 'adam



Model Parameters: ((80, 20, 40, 5), 'logistic', 'sgd', 0.615848211066026)
Model Score: 0.14285714285714285




Model Parameters: ((80, 20, 40, 5), 'logistic', 'sgd', 1.2742749857031321)
Model Score: 0.0




Model Parameters: ((80, 20, 40, 5), 'logistic', 'sgd', 2.6366508987303554)
Model Score: 0.0
Model Parameters: ((80, 20, 40, 5), 'logistic', 'sgd', 5.455594781168514)
Model Score: 0.07142857142857142
Model Parameters: ((80, 20, 40, 5), 'logistic', 'sgd', 11.288378916846883)
Model Score: 0.2857142857142857
Model Parameters: ((80, 20, 40, 5), 'logistic', 'sgd', 23.357214690901213)
Model Score: 0.5714285714285714
Model Parameters: ((80, 20, 40, 5), 'logistic', 'sgd', 48.32930238571752)
Model Score: 0.6428571428571429
Model Parameters: ((80, 20, 40, 5), 'logistic', 'sgd', 100.0)
Model Score: 0.6428571428571429
Model Parameters: ((80, 20, 40, 5), 'logistic', 'adam', 0.0001)
Model Score: 0.7142857142857143




Model Parameters: ((80, 20, 40, 5), 'logistic', 'adam', 0.00020691380811147902)
Model Score: 0.5




Model Parameters: ((80, 20, 40, 5), 'logistic', 'adam', 0.00042813323987193956)
Model Score: 0.6428571428571429




Model Parameters: ((80, 20, 40, 5), 'logistic', 'adam', 0.0008858667904100823)
Model Score: 0.5714285714285714




Model Parameters: ((80, 20, 40, 5), 'logistic', 'adam', 0.0018329807108324356)
Model Score: 0.5714285714285714




Model Parameters: ((80, 20, 40, 5), 'logistic', 'adam', 0.00379269019073225)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'logistic', 'adam', 0.007847599703514606)
Model Score: 0.5714285714285714
Model Parameters: ((80, 20, 40, 5), 'logistic', 'adam', 0.01623776739188721)
Model Score: 0.8571428571428571
Model Parameters: ((80, 20, 40, 5), 'logistic', 'adam', 0.03359818286283781)
Model Score: 0.8571428571428571
Model Parameters: ((80, 20, 40, 5), 'logistic', 'adam', 0.06951927961775606)
Model Score: 0.8571428571428571
Model Parameters: ((80, 20, 40, 5), 'logistic', 'adam', 0.14384498882876628)
Model Score: 0.0
Model Parameters: ((80, 20, 40, 5), 'logistic', 'adam', 0.29763514416313164)
Model Score: 0.8571428571428571
Model Parameters: ((80, 20, 40, 5), 'logistic', 'adam', 0.615848211066026)
Model Score: 0.7142857142857143
Model Parameters: ((80, 20, 40, 5), 'logistic', 'adam', 1.2742749857031321)
Model Score: 0.5
Model Parameters: ((80, 20, 40, 5), 'logistic', 'adam', 2.6366508987303554)
Model Score: 0.14285714285714285
Model Parameters: ((80, 20, 40, 5), 'logistic', 'ad

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 0.00020691380811147902)
Model Score: 0.42857142857142855
Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 0.00042813323987193956)
Model Score: 0.5
Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 0.0008858667904100823)
Model Score: 0.42857142857142855


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 0.0018329807108324356)
Model Score: 0.35714285714285715
Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 0.00379269019073225)
Model Score: 0.42857142857142855
Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 0.007847599703514606)
Model Score: 0.5
Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 0.01623776739188721)
Model Score: 0.35714285714285715
Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 0.03359818286283781)
Model Score: 0.42857142857142855


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 0.06951927961775606)
Model Score: 0.42857142857142855


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 0.14384498882876628)
Model Score: 0.5


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 0.29763514416313164)
Model Score: 0.42857142857142855


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 0.615848211066026)
Model Score: 0.6428571428571429


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 1.2742749857031321)
Model Score: 0.5


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 2.6366508987303554)
Model Score: 0.5714285714285714


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 5.455594781168514)
Model Score: 0.5714285714285714


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 11.288378916846883)
Model Score: 0.6428571428571429


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 23.357214690901213)
Model Score: 0.42857142857142855


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 48.32930238571752)
Model Score: 0.7142857142857143
Model Parameters: ((80, 20, 40, 5), 'tanh', 'lbfgs', 100.0)
Model Score: 0.8571428571428571




Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 0.0001)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 0.00020691380811147902)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 0.00042813323987193956)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 0.0008858667904100823)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 0.0018329807108324356)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 0.00379269019073225)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 0.007847599703514606)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 0.01623776739188721)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 0.03359818286283781)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 0.06951927961775606)
Model Score: 0.7142857142857143




Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 0.14384498882876628)
Model Score: 0.7142857142857143




Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 0.29763514416313164)
Model Score: 0.8571428571428571




Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 0.615848211066026)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 1.2742749857031321)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 2.6366508987303554)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 5.455594781168514)
Model Score: 0.8571428571428571
Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 11.288378916846883)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 23.357214690901213)
Model Score: 0.8571428571428571
Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 48.32930238571752)
Model Score: 0.6428571428571429
Model Parameters: ((80, 20, 40, 5), 'tanh', 'sgd', 100.0)
Model Score: 0.6428571428571429
Model Parameters: ((80, 20, 40, 5), 'tanh', 'adam', 0.0001)
Model Score: 0.6428571428571429
Model Parameters: ((80, 20, 40, 5), 'tanh', 'adam', 0.00020691380811147902)
Model Score: 0.5714285714285714
Model Parameters: ((80, 20, 40, 5), 'tanh', 'adam', 0.00042813323987193956)
Model Score: 0.7142857142857143
Model Parameters: ((80, 20, 40, 5), 'tanh', 'adam', 0.0008858667904100823)
Model Score: 0

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((80, 20, 40, 5), 'relu', 'lbfgs', 5.455594781168514)
Model Score: 0.5714285714285714


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((80, 20, 40, 5), 'relu', 'lbfgs', 11.288378916846883)
Model Score: 0.7857142857142857
Model Parameters: ((80, 20, 40, 5), 'relu', 'lbfgs', 23.357214690901213)
Model Score: 0.7142857142857143
Model Parameters: ((80, 20, 40, 5), 'relu', 'lbfgs', 48.32930238571752)
Model Score: 0.6428571428571429
Model Parameters: ((80, 20, 40, 5), 'relu', 'lbfgs', 100.0)
Model Score: 0.6428571428571429




Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 0.0001)
Model Score: 0.7142857142857143




Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 0.00020691380811147902)
Model Score: 0.7142857142857143




Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 0.00042813323987193956)
Model Score: 0.7142857142857143




Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 0.0008858667904100823)
Model Score: 0.7142857142857143




Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 0.0018329807108324356)
Model Score: 0.7142857142857143




Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 0.00379269019073225)
Model Score: 0.7142857142857143




Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 0.007847599703514606)
Model Score: 0.7142857142857143




Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 0.01623776739188721)
Model Score: 0.7142857142857143




Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 0.03359818286283781)
Model Score: 0.7142857142857143




Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 0.06951927961775606)
Model Score: 0.7142857142857143




Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 0.14384498882876628)
Model Score: 0.7142857142857143




Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 0.29763514416313164)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 0.615848211066026)
Model Score: 0.7857142857142857




Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 1.2742749857031321)
Model Score: 0.8571428571428571




Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 2.6366508987303554)
Model Score: 0.5
Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 5.455594781168514)
Model Score: 0.0
Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 11.288378916846883)
Model Score: 0.0
Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 23.357214690901213)
Model Score: 0.14285714285714285
Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 48.32930238571752)
Model Score: 0.6428571428571429
Model Parameters: ((80, 20, 40, 5), 'relu', 'sgd', 100.0)
Model Score: 0.6428571428571429
Model Parameters: ((80, 20, 40, 5), 'relu', 'adam', 0.0001)
Model Score: 0.7142857142857143
Model Parameters: ((80, 20, 40, 5), 'relu', 'adam', 0.00020691380811147902)
Model Score: 0.5
Model Parameters: ((80, 20, 40, 5), 'relu', 'adam', 0.00042813323987193956)
Model Score: 0.7142857142857143
Model Parameters: ((80, 20, 40, 5), 'relu', 'adam', 0.0008858667904100823)
Model Score: 0.5
Model Parameters: ((80, 20, 40, 5), 'relu', 'adam', 0.00



Model Parameters: ((75, 25, 50, 10), 'identity', 'sgd', 0.14384498882876628)
Model Score: 0.7857142857142857




Model Parameters: ((75, 25, 50, 10), 'identity', 'sgd', 0.29763514416313164)
Model Score: 0.7857142857142857




Model Parameters: ((75, 25, 50, 10), 'identity', 'sgd', 0.615848211066026)
Model Score: 0.8571428571428571




Model Parameters: ((75, 25, 50, 10), 'identity', 'sgd', 1.2742749857031321)
Model Score: 0.8571428571428571




Model Parameters: ((75, 25, 50, 10), 'identity', 'sgd', 2.6366508987303554)
Model Score: 0.8571428571428571
Model Parameters: ((75, 25, 50, 10), 'identity', 'sgd', 5.455594781168514)
Model Score: 0.9285714285714286
Model Parameters: ((75, 25, 50, 10), 'identity', 'sgd', 11.288378916846883)
Model Score: 0.7857142857142857
Model Parameters: ((75, 25, 50, 10), 'identity', 'sgd', 23.357214690901213)
Model Score: 0.7857142857142857
Model Parameters: ((75, 25, 50, 10), 'identity', 'sgd', 48.32930238571752)
Model Score: 0.6428571428571429
Model Parameters: ((75, 25, 50, 10), 'identity', 'sgd', 100.0)
Model Score: 0.6428571428571429
Model Parameters: ((75, 25, 50, 10), 'identity', 'adam', 0.0001)
Model Score: 0.7857142857142857
Model Parameters: ((75, 25, 50, 10), 'identity', 'adam', 0.00020691380811147902)
Model Score: 0.7857142857142857
Model Parameters: ((75, 25, 50, 10), 'identity', 'adam', 0.00042813323987193956)
Model Score: 0.7857142857142857
Model Parameters: ((75, 25, 50, 10), 'identi



Model Parameters: ((75, 25, 50, 10), 'logistic', 'sgd', 0.615848211066026)
Model Score: 0.0




Model Parameters: ((75, 25, 50, 10), 'logistic', 'sgd', 1.2742749857031321)
Model Score: 0.0




Model Parameters: ((75, 25, 50, 10), 'logistic', 'sgd', 2.6366508987303554)
Model Score: 0.0
Model Parameters: ((75, 25, 50, 10), 'logistic', 'sgd', 5.455594781168514)
Model Score: 0.14285714285714285
Model Parameters: ((75, 25, 50, 10), 'logistic', 'sgd', 11.288378916846883)
Model Score: 0.14285714285714285
Model Parameters: ((75, 25, 50, 10), 'logistic', 'sgd', 23.357214690901213)
Model Score: 0.6428571428571429
Model Parameters: ((75, 25, 50, 10), 'logistic', 'sgd', 48.32930238571752)
Model Score: 0.6428571428571429
Model Parameters: ((75, 25, 50, 10), 'logistic', 'sgd', 100.0)
Model Score: 0.6428571428571429
Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 0.0001)
Model Score: 0.5714285714285714




Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 0.00020691380811147902)
Model Score: 0.7857142857142857




Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 0.00042813323987193956)
Model Score: 0.7857142857142857




Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 0.0008858667904100823)
Model Score: 0.7857142857142857




Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 0.0018329807108324356)
Model Score: 0.7142857142857143




Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 0.00379269019073225)
Model Score: 0.6428571428571429




Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 0.007847599703514606)
Model Score: 0.7857142857142857




Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 0.01623776739188721)
Model Score: 0.7142857142857143
Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 0.03359818286283781)
Model Score: 0.7857142857142857
Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 0.06951927961775606)
Model Score: 0.7857142857142857
Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 0.14384498882876628)
Model Score: 0.7142857142857143
Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 0.29763514416313164)
Model Score: 0.0
Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 0.615848211066026)
Model Score: 0.6428571428571429
Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 1.2742749857031321)
Model Score: 0.0
Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 2.6366508987303554)
Model Score: 0.5
Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 5.455594781168514)
Model Score: 0.6428571428571429
Model Parameters: ((75, 25, 50, 10), 'logistic', 'adam', 11.2

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 0.0001)
Model Score: 0.2857142857142857


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 0.00020691380811147902)
Model Score: 0.2857142857142857


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 0.00042813323987193956)
Model Score: 0.2857142857142857


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 0.0008858667904100823)
Model Score: 0.42857142857142855
Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 0.0018329807108324356)
Model Score: 0.21428571428571427


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 0.00379269019073225)
Model Score: 0.42857142857142855


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 0.007847599703514606)
Model Score: 0.42857142857142855


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 0.01623776739188721)
Model Score: 0.42857142857142855


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 0.03359818286283781)
Model Score: 0.35714285714285715


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 0.06951927961775606)
Model Score: 0.35714285714285715


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 0.14384498882876628)
Model Score: 0.35714285714285715


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 0.29763514416313164)
Model Score: 0.42857142857142855


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 0.615848211066026)
Model Score: 0.42857142857142855


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 1.2742749857031321)
Model Score: 0.5


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 2.6366508987303554)
Model Score: 0.5


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 5.455594781168514)
Model Score: 0.5


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 11.288378916846883)
Model Score: 0.42857142857142855


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 23.357214690901213)
Model Score: 0.6428571428571429


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 48.32930238571752)
Model Score: 0.8571428571428571
Model Parameters: ((75, 25, 50, 10), 'tanh', 'lbfgs', 100.0)
Model Score: 0.8571428571428571




Model Parameters: ((75, 25, 50, 10), 'tanh', 'sgd', 0.0001)
Model Score: 0.7142857142857143




Model Parameters: ((75, 25, 50, 10), 'tanh', 'sgd', 0.00020691380811147902)
Model Score: 0.7142857142857143




Model Parameters: ((75, 25, 50, 10), 'tanh', 'sgd', 0.00042813323987193956)
Model Score: 0.7142857142857143




Model Parameters: ((75, 25, 50, 10), 'tanh', 'sgd', 0.0008858667904100823)
Model Score: 0.7142857142857143




Model Parameters: ((75, 25, 50, 10), 'tanh', 'sgd', 0.0018329807108324356)
Model Score: 0.7142857142857143




Model Parameters: ((75, 25, 50, 10), 'tanh', 'sgd', 0.00379269019073225)
Model Score: 0.7142857142857143




Model Parameters: ((75, 25, 50, 10), 'tanh', 'sgd', 0.007847599703514606)
Model Score: 0.7142857142857143




Model Parameters: ((75, 25, 50, 10), 'tanh', 'sgd', 0.01623776739188721)
Model Score: 0.7142857142857143




Model Parameters: ((75, 25, 50, 10), 'tanh', 'sgd', 0.03359818286283781)
Model Score: 0.7142857142857143




Model Parameters: ((75, 25, 50, 10), 'tanh', 'sgd', 0.06951927961775606)
Model Score: 0.7142857142857143




Model Parameters: ((75, 25, 50, 10), 'tanh', 'sgd', 0.14384498882876628)
Model Score: 0.7142857142857143


In [None]:
best_params = pd.DataFrame(comparison_dict)
best_params[best_params['score'] == best_params['score'].max()]

## Use best Parameters found
fit the final model and calculate model score

In [None]:
# Train the final model with the best parameters on the entire training dataset
final_model = MLPClassifier(hidden_layer_sizes=(80, 20, 40, 5),
                            activation='identity',
                            solver='sgd',
                            alpha=1.2742749857031321,
                            random_state=1)
final_model.fit(X_train, y_train)

In [None]:
# Train the final model with the best parameters on the entire training dataset
final_model = MLPClassifier(hidden_layer_sizes=(80, 20, 40, 5),
                            activation='identity',
                            solver='lbfgs',
                            alpha=0.007847599703514606,
                            random_state=1)
final_model.fit(X_train, y_train)

In [None]:
# Train the final model with the best parameters on the entire training dataset
final_model = MLPClassifier(hidden_layer_sizes=(75, 25, 50, 10),
                            activation='identity',
                            solver='lbfgs',
                            alpha=0.01623776739188721,
                            random_state=1)
final_model.fit(X_train, y_train)

In [None]:
# Define a function to evaluate the final model
def evaluate_final_model(model):
    model_score = score_classification(model)
    return model_score

In [None]:
# Evaluate the final model and get the score
final_model_score = evaluate_final_model(final_model)

# Print or store the final_model_score
print(f"Final Model Score: {final_model_score}")

## Run Prediction

In [None]:
def score_classification_with_predictions(model):
    predictions_df_list = []  # To store predictions for each circuit
    score = 0

    for circuit in df[df.season == year_test]['round'].unique():
        podium_pos = data[(data.season == year_test) & (data['round'] == circuit)]
        test = df[(df.season == year_test) & (df['round'] == circuit)]
        X_test = test.drop(['driver', 'podium'], axis=1)
        y_test = test.podium

        # Scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

        # Make predictions
        prediction_df = pd.DataFrame(model.predict_proba(X_test), columns=['proba_0', 'proba_1'])
        prediction_df['actual'] = y_test.reset_index(drop=True)
        prediction_df['driver'] = test.driver.reset_index(drop=True)
        prediction_df['grid'] = test.grid.reset_index(drop=True)
        prediction_df['podium'] = podium_pos.podium.reset_index(drop=True)
        prediction_df.sort_values('proba_1', ascending=False, inplace=True)
        prediction_df.reset_index(inplace=True, drop=True)
        prediction_df['predicted'] = prediction_df.index
#         prediction_df['predicted'] = prediction_df.predicted.map(lambda x: 1 if x == 0 else 0)
        prediction_df['predicted'] = prediction_df['predicted'] + 1
        # Append predictions_df to the list
        predictions_df_list.append(prediction_df)

        
    # Concatenate all predictions DataFrames into one
    predictions_df_final = pd.concat(predictions_df_list)
    predictions_df_final = predictions_df_final[['driver', 'grid', 'podium', 'actual', 'predicted', 'proba_0', 'proba_1']]
    head = info[info['season'] == year_test]
    head = head[['season', 'round', 'circuit_id']]
    head = head.reset_index(drop=True)
    predictions_df_final = predictions_df_final.reset_index(drop=True)
    predictions_df_final = pd.concat([head, predictions_df_final], axis=1)
    

    return  predictions_df_final

In [None]:
# Evaluate the final model and get the score and predictions DataFrame
final_predictions_df = score_classification_with_predictions(final_model)

In [None]:
# score on whole grid
grid_score = pd.DataFrame()
grid_score['actual'] = final_predictions_df['podium']
grid_score['predicted'] = final_predictions_df['predicted']
grid_score['actual_y'] = 1
grid_score['pred_y'] = np.where(final_predictions_df['podium'] == final_predictions_df['predicted'], 1, 0) 

score = 0
b = grid_score['pred_y']
for x in b:
    if x == 1:
        score += 1
score / len(b)
    
# precision_score(grid_score['actual_y'], grid_score['pred_y'])

In [None]:
final_predictions_df[final_predictions_df['round'] == 14].drop(columns=['actual'])

In [None]:
final_predictions_df.drop(columns=['actual', 'grid'])