In [1]:
import sqlite3

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, precision_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn import svm
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor

# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
# pd.set_option('display.max_colwidth', None)

year_test = 2024

In [2]:
def score_classification_with_predictions(model, pred_info, pred_data, pred_df):
    predictions_df_list = []  # To store predictions for each circuit
    score = 0

    for circuit in pred_df[pred_df.season == year_test]['round'].unique():
        podium_pos = pred_data[(pred_data.season == year_test) & (pred_data['round'] == circuit)]
        test = pred_df[(pred_df.season == year_test) & (pred_df['round'] == circuit)]
        X_test = test.drop(['driver', 'podium'], axis=1)
        y_test = test.podium

        # Scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

        # Make predictions
        prediction_df = pd.DataFrame(model.predict_proba(X_test), columns=['proba_0', 'proba_1'])
        prediction_df['actual'] = y_test.reset_index(drop=True)
        prediction_df['driver'] = test.driver.reset_index(drop=True)
        prediction_df['grid'] = test.grid.reset_index(drop=True)
        prediction_df['podium'] = podium_pos.podium.reset_index(drop=True)
        prediction_df.sort_values('proba_1', ascending=False, inplace=True)
        prediction_df.reset_index(inplace=True, drop=True)
        prediction_df['predicted'] = prediction_df.index
#         prediction_df['predicted'] = prediction_df.predicted.map(lambda x: 1 if x == 0 else 0)
        prediction_df['predicted'] = prediction_df['predicted'] + 1
        # Append predictions_df to the list
        predictions_df_list.append(prediction_df)

        
    # Concatenate all predictions DataFrames into one
    predictions_df_final = pd.concat(predictions_df_list)
    predictions_df_final = predictions_df_final[['driver', 'grid', 'podium', 'actual', 'predicted', 'proba_0', 'proba_1']]
    head = pred_info[pred_info['season'] == year_test]
    head = head[['season', 'round', 'circuit_id']]
    head = head.reset_index(drop=True)
    predictions_df_final = predictions_df_final.reset_index(drop=True)
    predictions_df_final = pd.concat([head, predictions_df_final], axis=1)
    

    return  predictions_df_final

## Connect to Database
create train and test data

In [3]:
connection = sqlite3.connect('racesD.db')
table_name = 'i_df_dum_table'
query = f'SELECT * FROM "{table_name}"'
data = pd.read_sql_query(query, connection)

table_name = 'h_final_cleaned_table'
query = f'SELECT * FROM "{table_name}"'
info = pd.read_sql_query(query, connection)
connection.close()

data = data.reset_index(drop=True)

df = data.copy()
df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

train = df.iloc[:-20]
X_train = train.drop(['driver', 'podium'], axis = 1)
y_train = train.podium

scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns = X_train.columns)

In [4]:
prediction = pd.DataFrame()
prediction

In [5]:
#JUST ONE RACE TO PREDICT
pred_info = info.iloc[-20:]
pred_data = data.iloc[-20:]
pred_df = df.iloc[-20:]

In [6]:
pred_df

Unnamed: 0,season,round,weather_warm,weather_cold,weather_dry,weather_wet,weather_cloudy,driver,grid,podium,...,drivers_webber,drivers_wehrlein,drivers_wendlinger,drivers_wilson,drivers_wurz,drivers_yamamoto,drivers_yoong,drivers_zanardi,drivers_zhou,drivers_zonta
16452,2024,14,0,0,0,1,0,leclerc,1,0,...,0,0,0,0,0,0,0,0,0,0
16453,2024,14,0,0,0,1,0,perez,2,0,...,0,0,0,0,0,0,0,0,0,0
16454,2024,14,0,0,0,1,0,hamilton,3,0,...,0,0,0,0,0,0,0,0,0,0
16455,2024,14,0,0,0,1,0,norris,4,0,...,0,0,0,0,0,0,0,0,0,0
16456,2024,14,0,0,0,1,0,piastri,5,0,...,0,0,0,0,0,0,0,0,0,0
16457,2024,14,0,0,0,1,0,russell,6,0,...,0,0,0,0,0,0,0,0,0,0
16458,2024,14,0,0,0,1,0,sainz,7,0,...,0,0,0,0,0,0,0,0,0,0
16459,2024,14,0,0,0,1,0,alonso,8,0,...,0,0,0,0,0,0,0,0,0,0
16460,2024,14,0,0,0,1,0,ocon,9,0,...,0,0,0,0,0,0,0,0,0,0
16461,2024,14,0,0,0,1,0,max_verstappen,10,0,...,0,0,0,0,0,0,0,0,0,0


## Prediction

In [7]:
while len(pred_info) > 0:

    # Train the final model with the best parameters on the entire training dataset
    final_model = MLPClassifier(hidden_layer_sizes=(75, 25, 50, 10),
                                activation='identity',
                                solver='lbfgs',
                                alpha=0.01623776739188721,
                                random_state=1)
    final_model.fit(X_train, y_train)

    # Evaluate the final model and get the score and predictions DataFrame
    final_predictions_df = score_classification_with_predictions(final_model, pred_info, pred_data, pred_df)
    # final_predictions_df = final_predictions_df.drop(columns=['podium', 'actual'])
    final_predictions_df = final_predictions_df.drop(columns=['actual'])

    driver = final_predictions_df["driver"][0]

    prediction = pd.concat([prediction, final_predictions_df[0:1]], ignore_index=True)

    pred_info = pred_info[pred_info["driver"] != driver]
    pred_data = pred_data[pred_data["driver"] != driver]
    pred_df = pred_df[pred_df["driver"] != driver]

    prediction["predicted"] = range(1, len(prediction)+1)
    
    print(prediction.drop(columns=["proba_0", "proba_1"]))
    print("\n")

   season  round circuit_id    driver  grid  podium  predicted
0    2024     14        spa  hamilton     3       0          1


   season  round circuit_id    driver  grid  podium  predicted
0    2024     14        spa  hamilton     3       0          1
1    2024     14        spa     perez     2       0          2


   season  round circuit_id    driver  grid  podium  predicted
0    2024     14        spa  hamilton     3       0          1
1    2024     14        spa     perez     2       0          2
2    2024     14        spa   leclerc     1       0          3


   season  round circuit_id          driver  grid  podium  predicted
0    2024     14        spa        hamilton     3       0          1
1    2024     14        spa           perez     2       0          2
2    2024     14        spa         leclerc     1       0          3
3    2024     14        spa  max_verstappen    10       0          4


   season  round circuit_id          driver  grid  podium  predicted
0    2024  

    season  round circuit_id          driver  grid  podium  predicted
0     2024     14        spa        hamilton     3       0          1
1     2024     14        spa           perez     2       0          2
2     2024     14        spa         leclerc     1       0          3
3     2024     14        spa  max_verstappen    10       0          4
4     2024     14        spa         piastri     5       0          5
5     2024     14        spa         russell     6       0          6
6     2024     14        spa          norris     4       0          7
7     2024     14        spa           sainz     7       0          8
8     2024     14        spa            ocon     9       0          9
9     2024     14        spa       ricciardo    13       0         10
10    2024     14        spa          alonso     8       0         11
11    2024     14        spa           gasly    12       0         12
12    2024     14        spa          bottas    14       0         13
13    2024     14   

In [8]:
prediction

Unnamed: 0,season,round,circuit_id,driver,grid,podium,predicted,proba_0,proba_1
0,2024,14,spa,hamilton,3,0,1,0.801368,0.1986319
1,2024,14,spa,perez,2,0,2,0.860516,0.1394843
2,2024,14,spa,leclerc,1,0,3,0.88129,0.1187105
3,2024,14,spa,max_verstappen,10,0,4,0.948936,0.05106371
4,2024,14,spa,piastri,5,0,5,0.954126,0.04587353
5,2024,14,spa,russell,6,0,6,0.981817,0.01818272
6,2024,14,spa,norris,4,0,7,0.982546,0.01745445
7,2024,14,spa,sainz,7,0,8,0.991604,0.008396081
8,2024,14,spa,ocon,9,0,9,0.996715,0.003284822
9,2024,14,spa,ricciardo,13,0,10,0.999385,0.0006148929


In [9]:
s = prediction.drop(columns=['podium'])
s['proba_1'] = round(s['proba_1'], 3)
s

Unnamed: 0,season,round,circuit_id,driver,grid,predicted,proba_0,proba_1
0,2024,14,spa,hamilton,3,1,0.801368,0.199
1,2024,14,spa,perez,2,2,0.860516,0.139
2,2024,14,spa,leclerc,1,3,0.88129,0.119
3,2024,14,spa,max_verstappen,10,4,0.948936,0.051
4,2024,14,spa,piastri,5,5,0.954126,0.046
5,2024,14,spa,russell,6,6,0.981817,0.018
6,2024,14,spa,norris,4,7,0.982546,0.017
7,2024,14,spa,sainz,7,8,0.991604,0.008
8,2024,14,spa,ocon,9,9,0.996715,0.003
9,2024,14,spa,ricciardo,13,10,0.999385,0.001
