In [2]:
import pandas as pd
import numpy as np
import warnings

from sklearn.metrics import precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [3]:
data = pd.read_csv(path+'data/ml_input.csv')
merged = pd.read_csv(path+'data/merged.csv')
parameters = pd.read_csv(path+'parameters/neutralnetworkclassifier.csv')

In [4]:
N = 2022

In [7]:
merged

Unnamed: 0,season,round,circuit_id,country,lat,long,date,driver,date_of_birth,nationality,...,constructor_wins_after_race,constructor_standings_pos_after_race,constructor_points,constructor_wins,constructor_standings_pos,starting_grid,qualifying_pos,qual_time,stage,q_delta


In [5]:
df = data.copy()
df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

train = df[df.season != N]
X_train = train.drop(['season', 'round','podium', 'qual_time'], axis=1)
y_train = train.podium

scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns = X_train.columns)

ValueError: Found array with 0 sample(s) (shape=(0, 9)) while a minimum of 1 is required by StandardScaler.

In [None]:
data.columns

Index(['season', 'round', 'podium', 'driver_points', 'driver_wins',
       'driver_standings_pos', 'constructor_points', 'constructor_wins',
       'constructor_standings_pos', 'grid', 'qual_time',
       'circuit_id_albert_park', 'circuit_id_americas', 'circuit_id_bahrain',
       'circuit_id_baku', 'circuit_id_catalunya', 'circuit_id_hockenheimring',
       'circuit_id_hungaroring', 'circuit_id_imola', 'circuit_id_interlagos',
       'circuit_id_istanbul', 'circuit_id_jeddah', 'circuit_id_losail',
       'circuit_id_marina_bay', 'circuit_id_miami', 'circuit_id_monaco',
       'circuit_id_monza', 'circuit_id_mugello', 'circuit_id_nurburgring',
       'circuit_id_portimao', 'circuit_id_red_bull_ring', 'circuit_id_ricard',
       'circuit_id_rodriguez', 'circuit_id_sepang', 'circuit_id_shanghai',
       'circuit_id_silverstone', 'circuit_id_sochi', 'circuit_id_spa',
       'circuit_id_suzuka', 'circuit_id_villeneuve', 'circuit_id_yas_marina',
       'circuit_id_zandvoort', 'stage_q1', '

In [None]:
def score_classification(model):
    score = 0
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(['season', 'round', 'podium', 'qual_time'], axis = 1)
        y_test = test.podium
        
        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns = X_test.columns)

        # make predictions
        prediction_df = pd.DataFrame(model.predict_proba(X_test), columns = ['proba_0', 'proba_1'])
        prediction_df['actual'] = y_test.reset_index(drop=True)
        prediction_df.sort_values('proba_1', ascending = False, inplace = True)
        prediction_df.reset_index(inplace=True, drop=True)
        prediction_df['predicted'] = prediction_df.index
        prediction_df['predicted'] = prediction_df.predicted.map(lambda x: 1 if x == 0 else 0)

        score += precision_score(prediction_df.actual, prediction_df.predicted)

    model_score = score / df[df.season == N]['round'].unique().max()
    return model_score

In [None]:
parameters.head(1)

Unnamed: 0.1,Unnamed: 0,model,hidden_layer_sizes,activation,solver,alpha,score
0,0,neural_network_classifier,"(80, 20, 40, 5)",identity,lbfgs,0.0001,0.727273


In [None]:
def get_predictions(model):
    predictions = []
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(['season', 'round','podium', 'qual_time'], axis = 1)
        y_test = test.podium
        
        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns = X_test.columns)
        
        # make predictions
        prediction_df = pd.DataFrame(model.predict_proba(X_test), columns=['proba_0', 'proba_1'])
        prediction_df['actual'] = y_test.reset_index(drop=True)
        prediction_df['predicted'] = prediction_df.proba_1.map(lambda x: 1 if float(x) == float(prediction_df.proba_1.max()) else 0)
        

        predictions += list(prediction_df.predicted.values)

    return predictions

In [None]:
hidden_layer_sizes= [(80,20,40,5), (75, 25, 50, 10)][0]
activation='identity'
solver='lbfgs'
alpha=0.00001

model = MLPClassifier(
    hidden_layer_sizes=hidden_layer_sizes,
    activation=activation, 
    solver=solver, alpha=alpha, 
    random_state=1)

model.fit(X_train, y_train)

model_score = score_classification(model)
predictions = get_predictions(model)

print(model_score)

0.47619047619047616


In [None]:
test = merged.query('season ==@N')
test['predicted'] = predictions
test = test[['season', 'round', 'circuit_id', 'driver', 'grid', 'podium', 'predicted']]

In [None]:
test.query('predicted == 1').drop('predicted', axis=1)

Unnamed: 0,season,round,circuit_id,driver,grid,podium
3059,2022,1,bahrain,leclerc,1,1
3080,2022,2,jeddah,leclerc,2,2
3097,2022,3,albert_park,leclerc,1,1
3120,2022,4,imola,leclerc,2,6
3134,2022,5,miami,leclerc,1,2
3171,2022,6,catalunya,leclerc,1,20
3175,2022,7,monaco,leclerc,1,4
3210,2022,8,baku,leclerc,1,19
3212,2022,9,villeneuve,max_verstappen,1,1
3238,2022,10,silverstone,max_verstappen,2,7


In [None]:
merged.query('season == 2022 & round == 14')

Unnamed: 0,season,round,circuit_id,country,lat,long,date,driver,nationality,constructor,...,constructor_wins_after_race,constructor_standings_pos_after_race,constructor_points,constructor_wins,constructor_standings_pos,grid,qual_time,stage,q_delta,driver_age
3309,2022,14,spa,Belgium,50.4372,5.97139,2022-08-28 13:00:00,max_verstappen,Dutch,red_bull,...,10,1,431,9,1,1,103.665,q3,0.0,24
3310,2022,14,spa,Belgium,50.4372,5.97139,2022-08-28 13:00:00,perez,Mexican,red_bull,...,10,1,431,9,1,3,104.462,q3,0.797,32
3311,2022,14,spa,Belgium,50.4372,5.97139,2022-08-28 13:00:00,sainz,Spanish,ferrari,...,4,2,334,4,2,2,104.297,q3,0.632,27
3312,2022,14,spa,Belgium,50.4372,5.97139,2022-08-28 13:00:00,russell,British,mercedes,...,0,3,304,0,3,8,105.776,q3,2.111,24
3313,2022,14,spa,Belgium,50.4372,5.97139,2022-08-28 13:00:00,alonso,Spanish,alpine,...,0,4,99,0,4,6,105.368,q3,1.703,41
3314,2022,14,spa,Belgium,50.4372,5.97139,2022-08-28 13:00:00,leclerc,Monegasque,ferrari,...,4,2,334,4,2,4,104.553,q3,0.888,24
3315,2022,14,spa,Belgium,50.4372,5.97139,2022-08-28 13:00:00,ocon,French,alpine,...,0,4,99,0,4,5,105.18,q3,1.515,25
3316,2022,14,spa,Belgium,50.4372,5.97139,2022-08-28 13:00:00,vettel,German,aston_martin,...,0,9,20,0,9,16,106.344,q1,2.679,35
3317,2022,14,spa,Belgium,50.4372,5.97139,2022-08-28 13:00:00,gasly,French,alphatauri,...,0,8,27,0,8,12,105.827,q2,2.162,26
3318,2022,14,spa,Belgium,50.4372,5.97139,2022-08-28 13:00:00,albon,Thai,williams,...,0,10,3,0,10,9,105.837,q3,2.172,26
