In [1]:
import pandas as pd
import numpy as np
import warnings

from sklearn.metrics import precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [2]:
data = pd.read_csv(path+'data/ml_input.csv')
merged = pd.read_csv(path+'data/processed.csv')

In [3]:
### Season to test results

N = 2022

In [4]:
data.columns

Index(['season', 'round', 'podium', 'driver', 'constructor', 'circuit_id',
       'driver_points_from', 'qualifying_pos', 'starting_grid', 'stage_q3',
       'driver_points_per', 'constructor_points_per',
       'constructor_points_before', 'driver_points_before',
       'driver_wins_before', 'constructor_wins_before',
       'constructor_standings_before', 'driver_standings_before',
       'driver_last_3', 'constructor_last_3'],
      dtype='object')

In [5]:
params_to_drop = ['season', 'round', 'driver', 'constructor', 'circuit_id', 'podium', 'driver_points_from']

In [6]:
df = data.copy()

train = df[df.season < N]

scaler = StandardScaler()

X_train = train.drop(params_to_drop, axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)

y_train = np.asarray(train.driver_points_from.values)

In [7]:
def score_regression(model):
    score = 0
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(params_to_drop, axis=1)
        y_test = test.driver_points_from

        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns = X_test.columns)

        # make predictions
        prediction_df = pd.DataFrame(model.predict(X_test), columns=['predicted_points'])
        prediction_df['actual_points'] = y_test.reset_index(drop=True)

        prediction_df['predicted_winner'] = prediction_df.predicted_points.map(lambda x: 1 if x == prediction_df.predicted_points.max() else 0)
        prediction_df['actual_winner'] = prediction_df.actual_points.map(lambda x: 1 if x == prediction_df.actual_points.max() else 0)

        score += precision_score(prediction_df.actual_winner, prediction_df.predicted_winner)

    return score / df[df.season == N]['round'].nunique()

In [8]:
model_scores ={'model':[],
                  'hidden_layer_sizes': [],
                  'activation': [],
                  'solver': [],
                  'alpha': [],
                  'score': []
                  }

In [9]:
# Neural Network Regressor training

params={'hidden_layer_sizes': [
    (80,20,40,5), 
    (75,30,50,10,3),
    (100, 50, 25, 10),
    (200, 100, 50, 25)], 
        'activation': ['identity', 'relu','logistic', 'tanh',], 
        'solver': ['lbfgs','sgd', 'adam'], 
        'alpha': np.logspace(-4,1,20)} 

for hidden_layer_sizes in params['hidden_layer_sizes']:
    for activation in params['activation']:
        for solver in params['solver']:
            for alpha in params['alpha']:
                model_params = (hidden_layer_sizes, activation, solver, alpha)
                model = MLPRegressor(
                    hidden_layer_sizes=hidden_layer_sizes,
                    activation=activation, 
                    solver=solver, 
                    alpha=alpha, 
                    random_state=1)
                model.fit(X_train, y_train)

                model_score = score_regression(model)

                model_scores['model'].append('nn_regressor')
                model_scores['hidden_layer_sizes'].append(hidden_layer_sizes)
                model_scores['activation'].append(activation)
                model_scores['solver'].append(solver)
                model_scores['alpha'].append(alpha)
                model_scores['score'].append(model_score)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

model	hidden_layer_sizes	activation	solver	alpha	score

hidden_layer_sizes=(75, 30, 50, 10, 3), activation=logistic, solver=lbfgs, alpha=0.000100, score=0.636364

hidden_layer_sizes=(75, 30, 50, 10, 3), activation=logistic, solver=adam, alpha=0.000336, score=0.636364

hidden_layer_sizes=(80, 20, 40, 5), activation=logistic, solver=adam, alpha=0.263665, score=0.636364

hidden_layer_sizes=(80, 20, 40, 5), activation=logistic, solver=adam, alpha=0.483293, score=0.590909

hidden_layer_sizes=(75, 30, 50, 10, 3), activation=logistic, solver=adam, alpha=0.002069, score=0.590909

hidden_layer_sizes=(75, 30, 50, 10, 3), activation=logistic, solver=adam, alpha=0.000616, score=0.590909

In [None]:
comparison_df = pd.DataFrame(model_scores).sort_values('score', ascending=False)

comparison_df.query('score > .5')

Unnamed: 0,model,hidden_layer_sizes,activation,solver,alpha,score
959,nn_regressor,"(200, 100, 50, 25)",tanh,adam,10.000000,0.736842
630,nn_regressor,"(100, 50, 25, 10)",logistic,sgd,0.042813,0.736842
628,nn_regressor,"(100, 50, 25, 10)",logistic,sgd,0.012743,0.736842
627,nn_regressor,"(100, 50, 25, 10)",logistic,sgd,0.006952,0.736842
626,nn_regressor,"(100, 50, 25, 10)",logistic,sgd,0.003793,0.736842
...,...,...,...,...,...,...
431,nn_regressor,"(75, 30, 50, 10, 3)",tanh,lbfgs,0.078476,0.526316
444,nn_regressor,"(75, 30, 50, 10, 3)",tanh,sgd,0.001129,0.526316
91,nn_regressor,"(80, 20, 40, 5)",relu,sgd,0.078476,0.526316
805,nn_regressor,"(200, 100, 50, 25)",relu,sgd,0.002069,0.526316


In [None]:
chosen_inputs = comparison_df.query('score > .5')

chosen_inputs.to_csv(path+'parameters/neutralnetworkregressor.csv')