In [138]:
import pandas as pd
import numpy as np
import warnings

from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [139]:
data = pd.read_csv(path+'data/ml_input.csv')
merged = pd.read_csv(path+'data/merged.csv')
parameters = pd.read_csv(path+'parameters/neutralnetworkregressor.csv')

In [140]:
N = 2022

In [141]:
params_to_drop = ['season', 'round', 'podium', 'driver_points_from']

In [142]:
df = data.copy()

train = df[df.season < N]

scaler = StandardScaler()

X_train = train.drop(params_to_drop, axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)

y_train = np.asarray(train.driver_points_from.values)

In [143]:
print(*data.columns)

season round podium q_delta starting_grid driver_points_before constructor_points_before driver_points_from driver_points_per points_percentage driver_last_3 constructor_last_3 driver_adrian_sutil driver_alexander_albon driver_alexander_rossi driver_andre_lotterer driver_antonio_giovinazzi driver_brendon_hartley driver_carlos_sainz driver_charles_leclerc driver_daniel_ricciardo driver_daniil_kvyat driver_esteban_gutierrez driver_esteban_ocon driver_felipe_massa driver_felipe_nasr driver_fernando_alonso driver_george_russell driver_jack_aitken driver_jean-eric_vergne driver_jenson_button driver_jolyon_palmer driver_jules_bianchi driver_kamui_kobayashi driver_kevin_magnussen driver_kimi_raikkonen driver_lance_stroll driver_lando_norris driver_lewis_hamilton driver_marcus_ericsson driver_max_chilton driver_max_verstappen driver_mick_schumacher driver_nicholas_latifi driver_nico_hulkenberg driver_nico_rosberg driver_nikita_mazepin driver_pascal_wehrlein driver_pastor_maldonado driver_pierr

In [144]:
def get_predictions(X_train, y_train, model):
    points = []
    winners = []
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(params_to_drop, axis=1)
        y_test = test.driver_points_from

        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

        # make predictions
        prediction_df = pd.DataFrame(model.predict(X_test), columns=['predicted_points'])
        prediction_df['actual_points'] = y_test.reset_index(drop=True)

        prediction_df['predicted_winner'] = prediction_df.predicted_points.map(lambda x: 1 if x == prediction_df.predicted_points.max() else 0)
        prediction_df['actual_winner'] = prediction_df.actual_points.map(lambda x: 1 if x == prediction_df.actual_points.max() else 0)

        winners += list(prediction_df.predicted_winner.values)
        points += list(prediction_df.predicted_points.values)
        
        # Retrain model
        X_train = pd.concat([X_train, X_test])
        y_train = np.append(y_train, y_test)


        model.fit(X_train, y_train)

    return points, winners

In [145]:
params = parameters.iloc[0]
hidden_layer_sizes = eval(params.hidden_layer_sizes)
activation = params.activation
solver = params.solver
alpha = params.alpha

params

Unnamed: 0                        170
model                    nn_regressor
hidden_layer_sizes    (80, 20, 40, 5)
activation                   logistic
solver                           adam
alpha                        0.042813
score                        0.681818
Name: 0, dtype: object

In [146]:
test = df[(df.season == N)]

model = MLPRegressor(
    hidden_layer_sizes=hidden_layer_sizes,
    activation=activation, 
    solver=solver, 
    alpha=alpha, 
    random_state=1)
    
model.fit(X_train, y_train)

points, winner = get_predictions(X_train, y_train, model)

test['predicted_points'] = points
test['predicted_winner'] = winner



In [147]:
len(test.query('predicted_winner == 1 & podium == 1'))/ test['round'].nunique()

0.5789473684210527

In [148]:
test.query('predicted_winner == 1')

Unnamed: 0,season,round,podium,q_delta,starting_grid,driver_points_before,constructor_points_before,driver_points_from,driver_points_per,points_percentage,...,circuit_id_shanghai,circuit_id_silverstone,circuit_id_sochi,circuit_id_spa,circuit_id_suzuka,circuit_id_villeneuve,circuit_id_yas_marina,circuit_id_zandvoort,predicted_points,predicted_winner
3129,2022,1,19,0.123,2,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,9.231536,1
3134,2022,2,4,0.0,1,0.0,0.0,12.0,0.0,0.0,...,0,0,0,0,0,0,0,0,9.226189,1
3165,2022,3,18,0.286,2,25.0,37.0,0.0,8.33,67.57,...,0,0,0,0,0,0,0,0,9.292008,1
3168,2022,5,1,0.0,3,59.0,113.0,26.0,11.8,52.21,...,0,0,0,0,0,0,0,0,9.274058,1
3187,2022,6,1,0.323,2,85.0,151.0,25.0,14.17,56.29,...,0,0,0,0,0,0,0,0,9.251398,1
3209,2022,7,3,0.29,4,110.0,195.0,15.0,15.71,56.41,...,0,0,0,0,0,0,0,0,9.084873,1
3227,2022,8,1,0.347,3,125.0,235.0,25.0,15.62,53.19,...,0,0,0,0,0,0,0,0,9.472987,1
3247,2022,9,1,0.0,1,150.0,279.0,25.0,16.67,53.76,...,0,0,0,0,0,1,0,0,9.57688,1
3273,2022,10,7,0.072,2,175.0,304.0,6.0,17.5,57.57,...,0,1,0,0,0,0,0,0,9.58019,1
3287,2022,12,1,0.304,2,208.0,359.0,25.0,17.33,57.94,...,0,0,0,0,0,0,0,0,9.561468,1
