In [25]:
import pandas as pd
import numpy as np
import warnings

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import precision_score

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'
    # path = '/Users/oliverjcarter/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [26]:
data = pd.read_csv(path+'data/ml_input.csv')
merged = pd.read_csv(path+'data/processed.csv')

In [27]:
params_to_drop = ['season', 'round', 'driver', 'constructor', 'circuit_id', 'podium', 'driver_points_from']

In [28]:
### Season to test results

N = 2021

In [29]:
df = data.copy()

train = df[df.season < N]

scaler = StandardScaler()
X_train = train.drop(params_to_drop, axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)

y_train = np.asarray(train.driver_points_from.values)

In [30]:
X_train.columns

Index(['qualifying_pos', 'starting_grid', 'stage_q3', 'driver_points_per',
       'constructor_points_per', 'constructor_points_before',
       'driver_points_before', 'driver_wins_before', 'constructor_wins_before',
       'constructor_standings_before', 'driver_standings_before',
       'driver_last_3', 'constructor_last_3'],
      dtype='object')

In [31]:
comparison_dict ={'model':[],
                  'criterion': [],
                  'max_features': [],
                  'max_depth': [],
                  'score': []
                  }

In [32]:
def score_regression(X_train, y_train, model):
    score = 0
    
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(params_to_drop, axis=1)
        y_test = test.driver_points_from

        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

        # make predictions
        prediction_df = pd.DataFrame(model.predict(X_test), columns=['predicted_points'])
        prediction_df['actual_points'] = y_test.reset_index(drop=True)
        prediction_df.sort_values('predicted_points', ascending=False, inplace=True)

        prediction_df['predicted_winner'] = prediction_df.predicted_points.map(lambda x: 1 if x == prediction_df.predicted_points.max() else 0)
        prediction_df['actual_winner'] = prediction_df.actual_points.map(lambda x: 1 if x == prediction_df.actual_points.max() else 0)

        score += precision_score(prediction_df.actual_winner, prediction_df.predicted_winner)
        
    return score / df[df.season == N]['round'].nunique()

In [33]:
# Random Forest Regressor

params={'criterion': ['squared_error'],
        'max_features': [0.8, 1.0],
        'max_depth': list(map(int, (np.linspace(5, 55, 26))))
        }

for criterion in params['criterion']:
    for max_features in params['max_features']:
        for max_depth in params['max_depth']:

            model_params = (criterion, max_features, max_depth)
            model = RandomForestRegressor(criterion = criterion, max_features = max_features, max_depth = max_depth)
            model.fit(X_train, y_train)
            
            score = score_regression(X_train, y_train, model)

            comparison_dict['model'].append('random_forest_regressor')
            comparison_dict['criterion'].append(criterion)
            comparison_dict['max_features'].append(max_features)
            comparison_dict['max_depth'].append(max_depth)
            comparison_dict['score'].append(score)

0.0
0.0
1.0
2.0
3.0
3.0
4.0
5.0
6.0
6.0
7.0
8.0
8.0
9.0
10.0
10.0
11.0
12.0
13.0
0.0
0.0
0.0
1.0
2.0
2.0
3.0
4.0
5.0
5.0
6.0
7.0
7.0
8.0
9.0
9.0
10.0
11.0
12.0
0.0
0.0
1.0
2.0
3.0
3.0
4.0
5.0
6.0
6.0
7.0
8.0
8.0
9.0
10.0
10.0
11.0
12.0
13.0
1.0
1.0
1.0
2.0
2.0
2.0
3.0
4.0
5.0
5.0
6.0
7.0
7.0
8.0
9.0
9.0
10.0
11.0
11.0
0.0
0.0
1.0
2.0
2.0
2.0
3.0
4.0
5.0
5.0
6.0
7.0
7.0
8.0
9.0
9.0
10.0
11.0
12.0
0.0
0.0
0.0
1.0
1.0
1.0
2.0
3.0
4.0
4.0
5.0
6.0
6.0
7.0
8.0
8.0
9.0
10.0
11.0
0.0
0.0
0.0
1.0
2.0
2.0
3.0
4.0
5.0
5.0
6.0
7.0
7.0
8.0
9.0
9.0
10.0
11.0
12.0
1.0
1.0
1.0
2.0
2.0
2.0
3.0
4.0
5.0
5.0
6.0
7.0
7.0
8.0
9.0
9.0
10.0
11.0
11.0
1.0
1.0
1.0
2.0
2.0
2.0
3.0
4.0
5.0
5.0
6.0
7.0
8.0
9.0
10.0
10.0
11.0
12.0
12.0
1.0
1.0
1.0
2.0
3.0
3.0
4.0
5.0
6.0
6.0
7.0
8.0
8.0
9.0
10.0
10.0
11.0
12.0
13.0
1.0
1.0
1.0
2.0
2.0
2.0
3.0
4.0
5.0
5.0
6.0
7.0
7.0
8.0
9.0
9.0
10.0
11.0
12.0
1.0
1.0
1.0
2.0
2.0
2.0
3.0
4.0
5.0
5.0
6.0
7.0
7.0
7.0
8.0
8.0
9.0
10.0
11.0
1.0
1.0
1.0
2.0
2.0
2.0
3.0
4.0
5.0
5.0
6.0
7.

KeyboardInterrupt: 

In [None]:
comparison_df = pd.DataFrame(comparison_dict)

chosen_inputs = comparison_df.query('score > 0.5').sort_values('score', ascending=False)


chosen_inputs.to_csv(path+'parameters/rf_regressor.csv', index=False)

chosen_inputs.head()

Unnamed: 0,model,criterion,max_features,max_depth,score
23,random_forest_regressor,squared_error,0.8,51,0.736842
0,random_forest_regressor,squared_error,0.8,5,0.684211
20,random_forest_regressor,squared_error,0.8,45,0.684211
47,random_forest_regressor,squared_error,1.0,47,0.684211
44,random_forest_regressor,squared_error,1.0,41,0.684211
