In [16]:
import pandas as pd
import numpy as np
import warnings

from sklearn.metrics import precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [17]:
data = pd.read_csv(path+'data/ml_input.csv')
parameters = pd.read_csv(path+'parameters/neutralnetworkclassifier.csv')

In [18]:
N = 2021

In [19]:
params_to_drop = ['season', 'round', 'driver', 'constructor', 'circuit_id', 'podium', 'driver_points_from']

In [20]:
df = data.copy()

train = df[df.season < N]
train.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

scaler = StandardScaler()
X_train = train.drop(params_to_drop, axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns = X_train.columns)

y_train = np.asarray(train.podium.values)

In [21]:
data.columns

Index(['season', 'round', 'podium', 'driver', 'constructor', 'circuit_id',
       'driver_points_from', 'qualifying_pos', 'starting_grid', 'stage_q3',
       'driver_points_per', 'constructor_points_per',
       'constructor_points_before', 'driver_points_before',
       'driver_wins_before', 'constructor_wins_before',
       'constructor_standings_before', 'driver_standings_before',
       'driver_last_3', 'constructor_last_3'],
      dtype='object')

In [22]:
def get_predictions(model, X_train, y_train):
    predictions = []
    probs = []

    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(params_to_drop, axis=1)
        y_test = test.podium
        
        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns = X_test.columns)
        
        # make predictions
        prediction_df = pd.DataFrame(model.predict_proba(X_test), columns=['proba_0', 'proba_1'])
        prediction_df['actual'] = y_test.reset_index(drop=True)
        prediction_df['predicted'] = prediction_df.proba_1.map(lambda x: 1 if float(x) == float(prediction_df.proba_1.max()) else 0)
        

        predictions += list(prediction_df.predicted.values)
        probs += list(prediction_df['proba_1'].values)

        # # Retrain model
        # X_train = pd.concat([X_train, X_test])
        # y_train = np.append(y_train, y_test)

        # model.fit(X_train, y_train)

    return predictions, probs

In [23]:
parameters.head()

Unnamed: 0,model,hidden_layer_sizes,activation,solver,alpha,score
0,neural_network_classifier,"(80, 20, 40, 5)",relu,sgd,0.000886,0.636364
1,neural_network_classifier,"(80, 20, 40, 5)",relu,sgd,0.033598,0.636364
2,neural_network_classifier,"(75, 25, 50, 10)",identity,sgd,2.636651,0.636364
3,neural_network_classifier,"(75, 25, 50, 10)",identity,adam,1.274275,0.636364
4,neural_network_classifier,"(80, 20, 40, 5)",identity,sgd,2.636651,0.636364


In [24]:
params = parameters.iloc[0]
hidden_layer_sizes = eval(params.hidden_layer_sizes)
activation = params.activation
solver = params.solver
alpha = params.alpha

params

model                 neural_network_classifier
hidden_layer_sizes              (80, 20, 40, 5)
activation                                 relu
solver                                      sgd
alpha                                  0.000886
score                                  0.636364
Name: 0, dtype: object

In [25]:
params.alpha

0.00088586679041

In [26]:
test = df[(df.season == N)]

model = MLPClassifier(
    hidden_layer_sizes=hidden_layer_sizes,
    activation=activation, 
    solver=solver, alpha=alpha, 
    random_state=1)

model.fit(X_train, y_train)

predictions, probs = get_predictions(model, X_train, y_train)

test['predicted_winner'] = predictions
test['probability'] = probs

In [27]:
rounds = test['round'].nunique()

print(f'Outright Winner Accuracy: { round(test.query("predicted_winner == 1 & podium == 1").shape[0] / rounds, 2) }')

print(f'Top-Two Accuracy: { round(test.query("predicted_winner == 1 & podium <= 2").shape[0] / rounds, 2) }')

print(f'Podium Accuracy: { round(test.query("predicted_winner == 1 & podium <= 3").shape[0] / rounds, 2) }')

Outright Winner Accuracy: 0.74
Top-Two Accuracy: 0.89
Podium Accuracy: 0.89


In [28]:
def highlight_row(row):
    if row['podium'] == 1:
        return ['background-color: limegreen'] * len(row)
    elif 1 < row['podium'] <= 3:
        return ['background-color: yellow; opacity: .95; color: black'] * len(row)
    elif 3 < row['podium'] <= 10:
        return ['background-color: orange'] * len(row)
    elif row['podium'] > 10:
        return ['background-color: #E34234'] * len(row)

In [29]:
q = test.query('predicted_winner == 1')[['circuit_id', 'driver', 'podium', 'starting_grid', 'probability']]

q.style.set_properties(color='white').apply(highlight_row, axis=1)

Unnamed: 0,circuit_id,driver,podium,starting_grid,probability
2765,bahrain,max_verstappen,2,1,0.168224
2785,imola,lewis_hamilton,2,1,0.368326
2804,portimao,lewis_hamilton,1,2,0.273766
2824,catalunya,lewis_hamilton,1,1,0.434506
2844,monaco,max_verstappen,1,2,0.285915
2877,baku,lewis_hamilton,15,2,0.384598
2883,ricard,max_verstappen,1,1,0.399878
2902,red_bull_ring,max_verstappen,1,1,0.444381
2922,red_bull_ring,max_verstappen,1,1,0.467346
2950,hungaroring,max_verstappen,9,3,0.41345
