In [37]:
import pandas as pd
import numpy as np
import warnings

from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [38]:
data = pd.read_csv(path+'data/ml_input.csv')
merged = pd.read_csv(path+'data/merged.csv')
parameters = pd.read_csv(path+'parameters/neutralnetworkregressor.csv')

In [39]:
N = 2022

In [40]:
df = data.copy()

train = df[df.season < N]

scaler = StandardScaler()

X_train = train.drop(['season', 'round', 'podium', 'driver_points_from'], axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)

y_train = np.asarray(train.driver_points_from.values)

In [41]:
data.columns

Index(['season', 'round', 'podium', 'q_delta', 'starting_grid',
       'driver_points_before', 'constructor_points_before',
       'driver_points_from', 'driver_points_per', 'points_percentage',
       ...
       'circuit_id_rodriguez', 'circuit_id_sepang', 'circuit_id_shanghai',
       'circuit_id_silverstone', 'circuit_id_sochi', 'circuit_id_spa',
       'circuit_id_suzuka', 'circuit_id_villeneuve', 'circuit_id_yas_marina',
       'circuit_id_zandvoort'],
      dtype='object', length=116)

In [42]:
def get_predictions(X_train, y_train, model):
    points = []
    winners = []
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(['season', 'round', 'podium', 'driver_points_from'], axis=1)
        y_test = test.driver_points_from

        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

        # make predictions
        prediction_df = pd.DataFrame(model.predict(X_test), columns=['predicted_points'])
        prediction_df['actual_points'] = y_test.reset_index(drop=True)

        print(prediction_df.predicted_points)
        prediction_df['predicted_winner'] = prediction_df.predicted_points.map(lambda x: 1 if x == prediction_df.predicted_points.max() else 0)
        prediction_df['actual_winner'] = prediction_df.actual_points.map(lambda x: 1 if x == prediction_df.actual_points.max() else 0)

        winners += list(prediction_df.predicted_winner.values)
        points += list(prediction_df.predicted_points.values)
        
        # Retrain model
        X_train = pd.concat([X_train, X_test])
        y_train = np.append(y_train, y_test)


        # model.fit(X_train, y_train)

    return points, winners

In [43]:
params = parameters.iloc[1]
# hidden_layer_sizes = params.hidden_layer_sizes
hidden_layer_sizes = (80,20,40,5)
activation = params.activation
solver = params.solver
alpha = params.alpha

params

Unnamed: 0                        161
model                    nn_regressor
hidden_layer_sizes    (80, 20, 40, 5)
activation                   logistic
solver                           adam
alpha                        0.000183
score                        0.590909
Name: 1, dtype: object

In [44]:
test = df[(df.season == N)]

model = MLPRegressor(
    hidden_layer_sizes=hidden_layer_sizes,
    activation=activation, 
    solver=solver, 
    alpha=alpha, 
    random_state=1)
    
model.fit(X_train, y_train)

points, winner = get_predictions(X_train, y_train, model)

test['predicted_points'] = points
test['predicted_winner'] = winner

0     8.933681
1     8.933681
2     8.933681
3     8.931417
4     8.801781
5     8.933680
6     0.706330
7     0.707891
8     1.886210
9     0.705243
10    0.705177
11    0.703588
12    1.948650
13    0.704312
14    0.704623
15    0.704095
16    0.703872
17    8.933681
18    8.933681
19    8.933681
Name: predicted_points, dtype: float64
0     8.933681
1     8.933681
2     8.933681
3     8.933681
4     8.933680
5     8.933677
6     0.703846
7     8.933681
8     8.933625
9     8.933680
10    0.704599
11    0.703462
12    0.703678
13    0.713469
14    8.933681
15    8.891764
16    0.704523
17    0.703671
Name: predicted_points, dtype: float64
0     8.933681
1     8.933681
2     8.933680
3     8.933681
4     8.933424
5     8.932497
6     8.933675
7     8.887354
8     8.933652
9     0.704488
10    0.703914
11    0.703652
12    0.704800
13    0.713514
14    0.703387
15    0.703555
16    8.933681
17    0.703715
18    8.933681
Name: predicted_points, dtype: float64
0     8.933681
1     8.93368



In [45]:
test.query('round == 6').sort_values('predicted_points', ascending=False)

Unnamed: 0,season,round,podium,q_delta,starting_grid,driver_points_before,constructor_points_before,driver_points_from,driver_points_per,points_percentage,...,circuit_id_shanghai,circuit_id_silverstone,circuit_id_sochi,circuit_id_spa,circuit_id_suzuka,circuit_id_villeneuve,circuit_id_yas_marina,circuit_id_zandvoort,predicted_points,predicted_winner
3187,2022,6,1,0.323,2,85.0,151.0,25.0,14.17,56.29,...,0,0,0,0,0,0,0,0,8.933681,1
3206,2022,6,20,0.0,1,104.0,157.0,0.0,17.33,66.24,...,0,0,0,0,0,0,0,0,8.933681,0
3190,2022,6,4,0.416,3,53.0,157.0,12.0,8.83,33.76,...,0,0,0,0,0,0,0,0,8.933681,0
3188,2022,6,2,0.67,5,66.0,151.0,19.0,11.0,43.71,...,0,0,0,0,0,0,0,0,8.933681,0
3191,2022,6,5,0.762,6,36.0,95.0,10.0,6.0,37.89,...,0,0,0,0,0,0,0,0,8.933681,0
3192,2022,6,6,0.858,7,30.0,31.0,8.0,5.0,96.77,...,0,0,0,0,0,0,0,0,8.933679,0
3189,2022,6,3,0.643,4,59.0,95.0,15.0,9.83,62.11,...,0,0,0,0,0,0,0,0,8.933678,0
3198,2022,6,12,1.547,9,11.0,46.0,0.0,1.83,23.91,...,0,0,0,0,0,0,0,0,0.749872,0
3199,2022,6,13,2.111,14,6.0,16.0,0.0,1.0,37.5,...,0,0,0,0,0,0,0,0,0.712852,0
3203,2022,6,17,0.932,8,15.0,15.0,0.0,2.5,100.0,...,0,0,0,0,0,0,0,0,0.712431,0
