In [28]:
import pandas as pd
import numpy as np
import warnings

from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [29]:
data = pd.read_csv(path+'data/ml_input.csv')
merged = pd.read_csv(path+'data/merged.csv')
parameters = pd.read_csv(path+'parameters/neutralnetworkregressor.csv')

In [30]:
N = 2022

In [31]:
params_to_drop = ['season', 'round', 'driver', 'constructor', 'circuit_id', 'podium', 'driver_points_from']

In [32]:
df = data.copy()

train = df[df.season < N]

scaler = StandardScaler()

X_train = train.drop(params_to_drop, axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)

y_train = np.asarray(train.driver_points_from.values)

In [33]:
print(*data.columns)

season round podium driver constructor circuit_id driver_points_from qualifying_pos starting_grid stage_q3 driver_points_per constructor_points_per constructor_points_before driver_points_before driver_wins_before constructor_wins_before constructor_standings_before driver_standings_before driver_last_3 constructor_last_3


In [34]:
def get_predictions(X_train, y_train, model):
    points = []
    winners = []
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(params_to_drop, axis=1)
        y_test = test.driver_points_from

        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

        # make predictions
        prediction_df = pd.DataFrame(model.predict(X_test), columns=['predicted_points'])
        prediction_df['actual_points'] = y_test.reset_index(drop=True)

        prediction_df['predicted_winner'] = prediction_df.predicted_points.map(lambda x: 1 if x == prediction_df.predicted_points.max() else 0)
        prediction_df['actual_winner'] = prediction_df.actual_points.map(lambda x: 1 if x == prediction_df.actual_points.max() else 0)

        winners += list(prediction_df.predicted_winner.values)
        points += list(prediction_df.predicted_points.values)
        
        # Retrain model
        # X_train = pd.concat([X_train, X_test])
        # y_train = np.append(y_train, y_test)


        # model.fit(X_train, y_train)

    return points, winners

In [35]:
params = parameters.iloc[0]
hidden_layer_sizes = eval(params.hidden_layer_sizes)
activation = params.activation
solver = params.solver
alpha = params.alpha

params

Unnamed: 0                           959
model                       nn_regressor
hidden_layer_sizes    (200, 100, 50, 25)
activation                          tanh
solver                              adam
alpha                               10.0
score                           0.736842
Name: 0, dtype: object

In [36]:
test = df[(df.season == N)]

model = MLPRegressor(
    hidden_layer_sizes=hidden_layer_sizes,
    activation=activation, 
    solver=solver, 
    alpha=alpha, 
    random_state=1)
    
model.fit(X_train, y_train)

points, winner = get_predictions(X_train, y_train, model)

test['predicted_points'] = points
test['predicted_winner'] = winner

In [37]:
len(test.query('predicted_winner == 1 & podium == 1'))/ test['round'].nunique()

0.631578947368421

In [38]:
test.query('predicted_winner == 1')

Unnamed: 0,season,round,podium,driver,constructor,circuit_id,driver_points_from,qualifying_pos,starting_grid,stage_q3,...,constructor_points_before,driver_points_before,driver_wins_before,constructor_wins_before,constructor_standings_before,driver_standings_before,driver_last_3,constructor_last_3,predicted_points,predicted_winner
3139,2022,1,1,charles_leclerc,ferrari,bahrain,26.0,1,1,1,...,0.0,0.0,0,0,1,1,0.0,0.0,14.893667,1
3160,2022,2,2,charles_leclerc,ferrari,jeddah,19.0,2,2,1,...,44.0,26.0,1,1,1,1,26.0,44.0,16.634532,1
3178,2022,3,1,charles_leclerc,ferrari,albert_park,26.0,1,1,1,...,78.0,45.0,1,1,1,1,45.0,78.0,17.05477,1
3199,2022,5,2,charles_leclerc,ferrari,miami,18.0,1,1,1,...,124.0,86.0,2,2,1,1,60.0,80.0,17.470916,1
3237,2022,6,20,charles_leclerc,ferrari,catalunya,0.0,1,1,1,...,157.0,104.0,2,2,1,1,59.0,79.0,17.368798,1
3241,2022,7,4,charles_leclerc,ferrari,monaco,12.0,1,1,1,...,169.0,104.0,2,2,2,2,33.0,65.0,16.982397,1
3258,2022,8,1,max_verstappen,red_bull,baku,25.0,3,3,1,...,235.0,125.0,4,5,1,1,66.0,122.0,16.963093,1
3278,2022,9,1,max_verstappen,red_bull,villeneuve,25.0,1,1,1,...,279.0,150.0,5,6,1,1,65.0,128.0,17.901937,1
3304,2022,10,7,max_verstappen,red_bull,silverstone,6.0,2,2,1,...,304.0,175.0,6,7,1,1,65.0,109.0,17.935003,1
3318,2022,12,1,max_verstappen,red_bull,ricard,25.0,2,2,1,...,359.0,208.0,6,7,1,1,58.0,80.0,17.890343,1
