In [18]:
import pandas as pd
import numpy as np
import warnings

from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [19]:
data = pd.read_csv(path+'data/ml_input.csv')
merged = pd.read_csv(path+'data/merged.csv')
parameters = pd.read_csv(path+'parameters/neutralnetworkclassifier.csv')

In [20]:
N = 2022

In [21]:
df = data.copy()

train = df[df.season < N]

scaler = StandardScaler()

X_train = train.drop(['season', 'round', 'podium', 'driver_points_from_race', 'constructor_points_from_race'], axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)

y_train = np.asarray(train.driver_points_from_race.values)

In [22]:
data.columns

Index(['season', 'round', 'podium', 'driver_points', 'driver_wins',
       'driver_standings_pos', 'constructor_points', 'constructor_wins',
       'constructor_standings_pos', 'starting_grid', 'driver_points_from_race',
       'constructor_points_from_race', 'stage_q1', 'stage_q2', 'stage_q3',
       'circuit_id_albert_park', 'circuit_id_americas', 'circuit_id_bahrain',
       'circuit_id_baku', 'circuit_id_catalunya', 'circuit_id_hockenheimring',
       'circuit_id_hungaroring', 'circuit_id_imola', 'circuit_id_interlagos',
       'circuit_id_istanbul', 'circuit_id_jeddah', 'circuit_id_losail',
       'circuit_id_marina_bay', 'circuit_id_miami', 'circuit_id_monaco',
       'circuit_id_monza', 'circuit_id_mugello', 'circuit_id_nurburgring',
       'circuit_id_portimao', 'circuit_id_red_bull_ring', 'circuit_id_ricard',
       'circuit_id_rodriguez', 'circuit_id_sepang', 'circuit_id_shanghai',
       'circuit_id_silverstone', 'circuit_id_sochi', 'circuit_id_spa',
       'circuit_id_suzuk

In [23]:
def get_predictions(X_train, y_train, model):
    points = []
    winners = []
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(['season', 'round', 'podium', 'driver_points_from_race', 'constructor_points_from_race'], axis=1)
        y_test = test.driver_points_from_race

        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns = X_test.columns)

        # make predictions
        prediction_df = pd.DataFrame(model.predict(X_test), columns=['predicted_points'])
        prediction_df['actual_points'] = y_test.reset_index(drop=True)

        prediction_df['predicted_winner'] = prediction_df.predicted_points.map(lambda x: 1 if x == prediction_df.predicted_points.max() else 0)
        prediction_df['actual_winner'] = prediction_df.actual_points.map(lambda x: 1 if x == prediction_df.actual_points.max() else 0)

        winners += list(prediction_df.predicted_winner.values)
        points += list(prediction_df.predicted_points.values)
        
        # Retrain model
        # X_train = pd.concat([X_train, X_test])
        # y_train = np.append(y_train, y_test)

        # model.fit(X_train, y_train)

    return points, winners

In [24]:
test = merged.copy()
test = test[['season', 'round', 'driver', 'starting_grid', 'podium', 'driver_points_from_race']].query('season ==@N')


params={'hidden_layer_sizes': [(75, 30, 50, 10, 3)], 
        'activation': ['logistic'], 
        'solver': ['adam'], 
        'alpha': [0.000616]} 

for hidden_layer_sizes in params['hidden_layer_sizes']:
    for activation in params['activation']:
        for solver in params['solver']:
            for alpha in params['alpha']:
                model_params = (hidden_layer_sizes, activation, solver, alpha)
                model = MLPRegressor(
                    hidden_layer_sizes=hidden_layer_sizes,
                    activation=activation, 
                    solver=solver, 
                    alpha=alpha, 
                    random_state=1)
                    
                model.fit(X_train, y_train)

                points, winner = get_predictions(X_train, y_train, model)

                test['predicted_points'] = points
                test['predicted_winner'] = winner



In [25]:
test.query('predicted_winner == 1')

Unnamed: 0,season,round,driver,starting_grid,podium,driver_points_from_race,predicted_points,predicted_winner
3188,2022,1,charles_leclerc,1,1,26.0,4.679283,1
3209,2022,2,charles_leclerc,2,2,19.0,4.679283,1
3226,2022,3,charles_leclerc,1,1,26.0,4.679283,1
3250,2022,4,charles_leclerc,2,6,15.0,4.679283,1
3264,2022,5,charles_leclerc,1,2,18.0,4.679283,1
3301,2022,6,charles_leclerc,1,20,0.0,4.679283,1
3304,2022,7,max_verstappen,4,3,15.0,4.679283,1
3322,2022,8,max_verstappen,3,1,25.0,4.679283,1
3342,2022,9,max_verstappen,1,1,25.0,4.679283,1
3368,2022,10,max_verstappen,2,7,6.0,4.679284,1
