In [104]:
import pandas as pd
import numpy as np
import warnings

from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [105]:
data = pd.read_csv(path+'data/ml_input.csv')
merged = pd.read_csv(path+'data/merged.csv')
parameters = pd.read_csv(path+'parameters/neutralnetworkregressor.csv')

In [106]:
N = 2021

In [107]:
params_to_drop = ['season', 'round', 'podium', 'driver_points_from']

In [108]:
df = data.copy()

train = df[df.season < N]

scaler = StandardScaler()

X_train = train.drop(params_to_drop, axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)

y_train = np.asarray(train.driver_points_from.values)

In [109]:
print(*data.columns)

season round podium q_delta starting_grid driver_points_before constructor_points_before driver_points_from driver_points_per points_percentage driver_last_3 constructor_last_3 driver_adrian_sutil driver_alexander_albon driver_alexander_rossi driver_andre_lotterer driver_antonio_giovinazzi driver_brendon_hartley driver_carlos_sainz driver_charles_leclerc driver_daniel_ricciardo driver_daniil_kvyat driver_esteban_gutierrez driver_esteban_ocon driver_felipe_massa driver_felipe_nasr driver_fernando_alonso driver_george_russell driver_jack_aitken driver_jean-eric_vergne driver_jenson_button driver_jolyon_palmer driver_jules_bianchi driver_kamui_kobayashi driver_kevin_magnussen driver_kimi_raikkonen driver_lance_stroll driver_lando_norris driver_lewis_hamilton driver_marcus_ericsson driver_max_chilton driver_max_verstappen driver_mick_schumacher driver_nicholas_latifi driver_nico_hulkenberg driver_nico_rosberg driver_nikita_mazepin driver_pascal_wehrlein driver_pastor_maldonado driver_pierr

In [110]:
def get_predictions(X_train, y_train, model):
    points = []
    winners = []
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(params_to_drop, axis=1)
        y_test = test.driver_points_from

        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

        # make predictions
        prediction_df = pd.DataFrame(model.predict(X_test), columns=['predicted_points'])
        prediction_df['actual_points'] = y_test.reset_index(drop=True)

        prediction_df['predicted_winner'] = prediction_df.predicted_points.map(lambda x: 1 if x == prediction_df.predicted_points.max() else 0)
        prediction_df['actual_winner'] = prediction_df.actual_points.map(lambda x: 1 if x == prediction_df.actual_points.max() else 0)

        print(prediction_df[['predicted_points', 'actual_points']])

        winners += list(prediction_df.predicted_winner.values)
        points += list(prediction_df.predicted_points.values)
        
        # Retrain model
        # X_train = pd.concat([X_train, X_test])
        # y_train = np.append(y_train, y_test)


        # model.fit(X_train, y_train)

    return points, winners

In [111]:
params = parameters.iloc[1]
hidden_layer_sizes = eval(params.hidden_layer_sizes)
activation = params.activation
solver = params.solver
alpha = params.alpha

params

Unnamed: 0                        161
model                    nn_regressor
hidden_layer_sizes    (80, 20, 40, 5)
activation                   logistic
solver                           adam
alpha                        0.000183
score                        0.590909
Name: 1, dtype: object

In [112]:
test = df[(df.season == N)]

model = MLPRegressor(
    hidden_layer_sizes=hidden_layer_sizes,
    activation=activation, 
    solver=solver, 
    alpha=alpha, 
    random_state=1)
    
model.fit(X_train, y_train)

points, winner = get_predictions(X_train, y_train, model)

test['predicted_points'] = points
test['predicted_winner'] = winner



    predicted_points  actual_points
0           8.403457           25.0
1           8.403457           18.0
2           8.403457           16.0
3           8.403451           12.0
4           8.403445           10.0
5           8.403457            8.0
6           8.403454            6.0
7           8.403457            4.0
8           0.764799            2.0
9           8.362204            1.0
10          0.763025            0.0
11          0.756911            0.0
12          0.817465            0.0
13          0.756801            0.0
14          0.758559            0.0
15          0.758084            0.0
16          8.403455            0.0
17          0.757352            0.0
18          8.403227            0.0
19          0.757800            0.0
    predicted_points  actual_points
0           8.403457           25.0
1           8.403457           19.0
2           8.403455           15.0
3           8.403457           12.0
4           8.368263           10.0
5           8.403443        

In [113]:
len(test.query('predicted_winner == 1 & podium == 1'))/ test['round'].nunique()

0.631578947368421

In [114]:
test.query('predicted_winner == 1')

Unnamed: 0,season,round,podium,q_delta,starting_grid,driver_points_before,constructor_points_before,driver_points_from,driver_points_per,points_percentage,...,circuit_id_shanghai,circuit_id_silverstone,circuit_id_sochi,circuit_id_spa,circuit_id_suzuka,circuit_id_villeneuve,circuit_id_yas_marina,circuit_id_zandvoort,predicted_points,predicted_winner
2743,2021,1,2,0.0,1,0.0,0.0,18.0,0.0,0.0,...,0,0,0,0,0,0,0,0,8.403457,1
2762,2021,2,1,0.0,3,18.0,28.0,25.0,9.0,64.29,...,0,0,0,0,0,0,0,0,8.403457,1
2782,2021,3,2,0.398,3,43.0,53.0,18.0,14.33,81.13,...,0,0,0,0,0,0,0,0,8.403457,1
2802,2021,4,2,0.036,2,61.0,83.0,19.0,15.25,73.49,...,0,0,0,0,0,0,0,0,8.403457,1
2821,2021,5,1,0.23,2,80.0,112.0,25.0,16.0,71.43,...,0,0,0,0,0,0,0,0,8.403457,1
2856,2021,6,18,0.0,3,105.0,149.0,0.0,17.5,70.47,...,0,0,0,0,0,0,0,0,8.403457,1
2858,2021,7,1,0.0,1,105.0,174.0,26.0,15.0,60.34,...,0,0,0,0,0,0,0,0,8.403457,1
2876,2021,8,1,0.0,1,131.0,215.0,25.0,16.38,60.93,...,0,0,0,0,0,0,0,0,8.403457,1
2896,2021,9,1,0.0,1,156.0,252.0,26.0,17.33,61.9,...,0,0,0,0,0,0,0,0,8.403457,1
2924,2021,11,9,0.0,3,185.0,289.0,2.0,16.82,64.01,...,0,0,0,0,0,0,0,0,8.403457,1
