In [1]:
import pandas as pd
import numpy as np
import warnings

from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [2]:
data = pd.read_csv(path+'data/ml_input.csv')
merged = pd.read_csv(path+'data/merged.csv')
parameters = pd.read_csv(path+'parameters/neutralnetworkclassifier.csv')

In [3]:
N = 2022

In [4]:
df = data.copy()

train = df[df.season < N]

scaler = StandardScaler()

X_train = train.drop(['season', 'round', 'podium', 'driver_points_from_race', 'constructor_points_from_race', 'qualifying_pos'], axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)

y_train = np.asarray(train.driver_points_from_race.values)

In [5]:
data.columns

Index(['season', 'round', 'podium', 'driver_points', 'driver_wins',
       'driver_standings_pos', 'constructor_points', 'constructor_wins',
       'constructor_standings_pos', 'qualifying_pos',
       ...
       'circuit_id_rodriguez', 'circuit_id_sepang', 'circuit_id_shanghai',
       'circuit_id_silverstone', 'circuit_id_sochi', 'circuit_id_spa',
       'circuit_id_suzuka', 'circuit_id_villeneuve', 'circuit_id_yas_marina',
       'circuit_id_zandvoort'],
      dtype='object', length=117)

In [6]:
def get_predictions(X_train, y_train, model):
    points = []
    winners = []
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(['season', 'round', 'podium', 'driver_points_from_race', 'constructor_points_from_race', 'qualifying_pos'], axis=1)
        y_test = test.driver_points_from_race

        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns = X_test.columns)

        # make predictions
        prediction_df = pd.DataFrame(model.predict(X_test), columns=['predicted_points'])
        prediction_df['actual_points'] = y_test.reset_index(drop=True)

        prediction_df['predicted_winner'] = prediction_df.predicted_points.map(lambda x: 1 if x == prediction_df.predicted_points.max() else 0)
        prediction_df['actual_winner'] = prediction_df.actual_points.map(lambda x: 1 if x == prediction_df.actual_points.max() else 0)

        winners += list(prediction_df.predicted_winner.values)
        points += list(prediction_df.predicted_points.values)
        
        # Retrain model
        X_train = pd.concat([X_train, X_test])
        y_train = np.append(y_train, y_test)

        model.fit(X_train, y_train)

    return points, winners

In [7]:
test = merged.copy()
test = test[['season', 'round', 'driver', 'starting_grid', 'podium', 'driver_points_from_race']].query('season ==@N')


params={'hidden_layer_sizes': [(75, 30, 50, 10, 3)], 
        'activation': ['logistic'], 
        'solver': ['lbfgs'], 
        'alpha': [0.000100]} 

for hidden_layer_sizes in params['hidden_layer_sizes']:
    for activation in params['activation']:
        for solver in params['solver']:
            for alpha in params['alpha']:
                model_params = (hidden_layer_sizes, activation, solver, alpha)
                model = MLPRegressor(
                    hidden_layer_sizes=hidden_layer_sizes,
                    activation=activation, 
                    solver=solver, 
                    alpha=alpha, 
                    random_state=1,
                    max_iter=500)
                    
                model.fit(X_train, y_train)

                points, winner = get_predictions(X_train, y_train, model)

                test['predicted_points'] = points
                test['predicted_winner'] = winner

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

In [8]:
test.query('predicted_points == 1 & podium <= 3')

Unnamed: 0,season,round,driver,starting_grid,podium,driver_points_from_race,predicted_points,predicted_winner
3190,2022,1,lewis_hamilton,5,3,15.0,1,17.506462
3245,2022,4,max_verstappen,1,1,34.0,1,19.911051
3264,2022,5,charles_leclerc,1,2,18.0,1,20.060065
3282,2022,6,max_verstappen,2,1,25.0,1,18.075079
3304,2022,7,max_verstappen,4,3,15.0,1,17.786239
3342,2022,9,max_verstappen,1,1,25.0,1,24.323706
3362,2022,10,carlos_sainz,1,1,25.0,1,21.289191
3383,2022,11,max_verstappen,1,2,27.0,1,25.997611
3402,2022,12,lewis_hamilton,4,2,18.0,1,21.677655
3442,2022,14,sergio_perez,2,2,18.0,1,17.067289
