In [1]:
import pandas as pd
import numpy as np
import warnings

from sklearn.metrics import precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [2]:
data = pd.read_csv(path+'data/ml_input.csv')
merged = pd.read_csv(path+'data/merged.csv')

In [3]:
### Season to test results

N = 2021

In [4]:
data.columns

Index(['season', 'round', 'podium', 'driver_points', 'driver_wins',
       'driver_standings_pos', 'constructor_points', 'constructor_wins',
       'constructor_standings_pos', 'qualifying_pos',
       ...
       'circuit_id_rodriguez', 'circuit_id_sepang', 'circuit_id_shanghai',
       'circuit_id_silverstone', 'circuit_id_sochi', 'circuit_id_spa',
       'circuit_id_suzuka', 'circuit_id_villeneuve', 'circuit_id_yas_marina',
       'circuit_id_zandvoort'],
      dtype='object', length=117)

In [5]:
df = data.copy()
df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

train = df[df.season < N]
X_train = train.drop(['season', 'round', 'podium', 'driver_points_from_race', 'constructor_points_from_race', 'qualifying_pos'], axis=1)
y_train = train.podium

scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns = X_train.columns)

In [12]:
X_train

Unnamed: 0,driver_points,driver_wins,driver_standings_pos,constructor_points,constructor_wins,constructor_standings_pos,starting_grid,driver_adrian_sutil,driver_alexander_albon,driver_alexander_rossi,...,circuit_id_rodriguez,circuit_id_sepang,circuit_id_shanghai,circuit_id_silverstone,circuit_id_sochi,circuit_id_spa,circuit_id_suzuka,circuit_id_villeneuve,circuit_id_yas_marina,circuit_id_zandvoort
0,-0.679535,-0.314453,-1.595994,-0.693558,-0.361653,-1.697284,-1.294514,-0.083257,-0.116567,-0.042601,...,-0.191871,-0.174985,-0.21227,-0.248069,-0.232895,-0.232895,-0.214128,-0.21227,-0.230289,0.0
1,-0.679535,-0.314453,-1.595994,-0.693558,-0.361653,-1.697284,-1.122576,-0.083257,-0.116567,-0.042601,...,-0.191871,-0.174985,-0.21227,-0.248069,-0.232895,-0.232895,-0.214128,-0.21227,-0.230289,0.0
2,-0.679535,-0.314453,-1.595994,-0.693558,-0.361653,-1.697284,-0.090952,-0.083257,-0.116567,-0.042601,...,-0.191871,-0.174985,-0.21227,-0.248069,-0.232895,-0.232895,-0.214128,-0.21227,-0.230289,0.0
3,-0.679535,-0.314453,-1.595994,-0.693558,-0.361653,-1.697284,-0.950639,-0.083257,-0.116567,-0.042601,...,-0.191871,-0.174985,-0.21227,-0.248069,-0.232895,-0.232895,-0.214128,-0.21227,-0.230289,0.0
4,-0.679535,-0.314453,-1.595994,-0.693558,-0.361653,-1.697284,0.768734,-0.083257,-0.116567,-0.042601,...,-0.191871,-0.174985,-0.21227,-0.248069,-0.232895,-0.232895,-0.214128,-0.21227,-0.230289,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2755,-0.621236,-0.314453,1.067971,-0.634057,-0.361653,0.856971,0.596797,-0.083257,-0.116567,-0.042601,...,-0.191871,-0.174985,-0.21227,-0.248069,-0.232895,-0.232895,-0.214128,-0.21227,4.342363,0.0
2756,-0.679535,-0.314453,1.694786,-0.693558,-0.361653,1.495535,1.284546,-0.083257,-0.116567,-0.042601,...,-0.191871,-0.174985,-0.21227,-0.248069,-0.232895,-0.232895,-0.214128,-0.21227,4.342363,0.0
2757,-0.664960,-0.314453,1.538082,-0.671245,-0.361653,1.176253,1.628421,-0.083257,-0.116567,-0.042601,...,-0.191871,-0.174985,-0.21227,-0.248069,-0.232895,-0.232895,-0.214128,-0.21227,4.342363,0.0
2758,-0.679535,-0.314453,2.008193,-0.671245,-0.361653,1.176253,1.112609,-0.083257,-0.116567,-0.042601,...,-0.191871,-0.174985,-0.21227,-0.248069,-0.232895,-0.232895,-0.214128,-0.21227,4.342363,0.0


In [39]:
def score_classification(model):
    score = 0
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(['season', 'round', 'podium', 'driver_points_from_race', 'constructor_points_from_race', 'qualifying_pos'], axis=1)
        y_test = test.podium
        
        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns = X_test.columns)

        # make predictions
        prediction_df = pd.DataFrame(model.predict_proba(X_test), columns = ['proba_0', 'proba_1'])
        prediction_df['actual'] = y_test.reset_index(drop=True)
        prediction_df['predicted'] = prediction_df.proba_1.map(lambda x: 1 if x == prediction_df.proba_1.max() else 0)

        score += precision_score(prediction_df.actual, prediction_df.predicted)

    model_score = score / df[df.season == N]['round'].unique().max()
    return model_score

In [40]:
comparison_dict ={'model':[],
                  'hidden_layer_sizes': [],
                  'activation': [],
                  'solver': [],
                  'alpha': [],
                  'score': []
                  }

In [41]:
# Neural network

params={'hidden_layer_sizes': [(80,20,40,5), (75,25,50,10)], 
        'activation': ['identity', 'logistic', 'tanh', 'relu'], 
        'solver': ['lbfgs', 'sgd', 'adam'], 
        'alpha': np.logspace(-4,2,20)} 


for hidden_layer_sizes in params['hidden_layer_sizes']:
    for activation in params['activation']:
        for solver in params['solver']:
            for alpha in params['alpha']:
                model_params = (hidden_layer_sizes, activation, solver, alpha)
                model = MLPClassifier(
                    hidden_layer_sizes=hidden_layer_sizes,
                    activation=activation, 
                    solver=solver, 
                    alpha=alpha, 
                    random_state=1)

                model.fit(X_train, y_train)

                model_score = score_classification(model)

                comparison_dict['model'].append('neural_network_classifier')
                comparison_dict['hidden_layer_sizes'].append(hidden_layer_sizes)
                comparison_dict['activation'].append(activation)
                comparison_dict['solver'].append(solver)
                comparison_dict['alpha'].append(alpha)
                comparison_dict['score'].append(model_score)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

In [42]:
comparison_df = pd.DataFrame(comparison_dict)

comparison_df.sort_values('score', ascending=False)

Unnamed: 0,model,hidden_layer_sizes,activation,solver,alpha,score
421,neural_network_classifier,"(75, 25, 50, 10)",relu,lbfgs,0.000207,0.5
160,neural_network_classifier,"(80, 20, 40, 5)",tanh,adam,0.000100,0.5
162,neural_network_classifier,"(80, 20, 40, 5)",tanh,adam,0.000428,0.5
163,neural_network_classifier,"(80, 20, 40, 5)",tanh,adam,0.000886,0.5
164,neural_network_classifier,"(80, 20, 40, 5)",tanh,adam,0.001833,0.5
...,...,...,...,...,...,...
308,neural_network_classifier,"(75, 25, 50, 10)",logistic,lbfgs,0.033598,0.0
307,neural_network_classifier,"(75, 25, 50, 10)",logistic,lbfgs,0.016238,0.0
306,neural_network_classifier,"(75, 25, 50, 10)",logistic,lbfgs,0.007848,0.0
305,neural_network_classifier,"(75, 25, 50, 10)",logistic,lbfgs,0.003793,0.0


model	hidden_layer_sizes	activation	solver	alpha	score
421	neural_network_classifier	(75, 25, 50, 10)	relu	lbfgs	0.000207	0.5
160	neural_network_classifier	(80, 20, 40, 5)	tanh	adam	0.000100	0.5
162	neural_network_classifier	(80, 20, 40, 5)	tanh	adam	0.000428	0.5
163	neural_network_classifier	(80, 20, 40, 5)	tanh	adam	0.000886	0.5
164	neural_network_classifier	(80, 20, 40, 5)	tanh	adam	0.001833	0.5

In [43]:
# chosen_inputs = comparison_df.query('score == 0.7272727272727273')

# chosen_inputs.to_csv(path+'parameters/neutralnetworkclassifier.csv')