In [57]:
import pandas as pd
import numpy as np
import warnings

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import precision_score

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/f1-analytics/'
elif platform == "darwin":
    path = '~/Documents/GitHub/f1-analytics/'
    # path = '/Users/oliverjcarter/Documents/GitHub/f1-analytics/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [58]:
data = pd.read_csv(path+'data/ml_input.csv')
parameters = pd.read_csv(path+'parameters/rf_regressor.csv')

In [59]:
### Season to test results

N = 2021

In [60]:
params_to_drop = ['season', 'round', 'driver', 'constructor', 'circuit_id', 'podium', 'driver_points_from']

In [61]:
df = data.copy()

train = df[df.season < N]

scaler = StandardScaler()

X_train = train.drop(params_to_drop, axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)

y_train = np.asarray(train.driver_points_from.values)

In [62]:
def get_predictions(X_train, y_train, model):
    points = []
    winners = []

    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(params_to_drop, axis=1)
        y_test = test.driver_points_from

        # Scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

        # Make predictions
        prediction_df = pd.DataFrame(model.predict(X_test), columns=['predicted_points'])
        prediction_df['actual_points'] = y_test.reset_index(drop=True)

        prediction_df['predicted_winner'] = prediction_df.predicted_points.map(lambda x: 1 if x == prediction_df.predicted_points.max() else 0)
        prediction_df['actual_winner'] = prediction_df.actual_points.map(lambda x: 1 if x == prediction_df.actual_points.max() else 0)

        points += list(prediction_df.predicted_points.values)
        winners += list(prediction_df.predicted_winner.values)


        # # Retrain model
        # X_train = pd.concat([X_train, X_test])
        # y_train = np.append(y_train, y_test)

        # model.fit(X_train, y_train)
        

    return points, winners

In [63]:
params = parameters.iloc[0]
criterion = params.criterion
max_features = params.max_features	
max_depth = params.max_depth

params

model           random_forest_regressor
criterion                 squared_error
max_features                        0.8
max_depth                             5
score                          0.684211
Name: 0, dtype: object

In [64]:
# Random Forest Regressor

test = df[(df.season == N)]

model_params = (criterion, max_features, max_depth)
model = RandomForestRegressor(criterion=criterion, max_features=max_features, max_depth=max_depth)

model.fit(X_train, y_train)

points, winner = get_predictions(X_train, y_train, model)

test['predicted_points'] = points
test['predicted_winner'] = winner

In [65]:
rounds = test['round'].nunique()

print(f'Outright Winner Accuracy: { round(test.query("predicted_winner == 1 & podium == 1").shape[0] / rounds, 2) }')

print(f'Top-Two Accuracy: { round(test.query("predicted_winner == 1 & podium <= 2").shape[0] / rounds, 2) }')

print(f'Podium Accuracy: { round(test.query("predicted_winner == 1 & podium <= 3").shape[0] / rounds, 2) }')

Outright Winner Accuracy: 0.68
Top-Two Accuracy: 0.84
Podium Accuracy: 0.84


In [66]:
def highlight_row(row):
    if row['podium'] == 1:
        return ['background-color: limegreen;'] * len(row)
    elif 1 < row['podium'] <= 3:
        return ['background-color: yellow; opacity: .95; color: black'] * len(row)
    elif 3 < row['podium'] <= 10:
        return ['background-color: orange'] * len(row)
    elif row['podium'] > 10:
        return ['background-color: #E34234'] * len(row)

In [67]:
q = test.query('predicted_winner == 1')[['circuit_id', 'driver', 'podium', 'starting_grid', 'predicted_points']]

q.style.set_properties(color='white').apply(highlight_row, axis=1)

Unnamed: 0,circuit_id,driver,podium,starting_grid,predicted_points
2765,bahrain,max_verstappen,2,1,17.472539
2785,imola,lewis_hamilton,2,1,18.514648
2804,portimao,lewis_hamilton,1,2,16.793531
2824,catalunya,lewis_hamilton,1,1,19.68635
2844,monaco,max_verstappen,1,2,17.023386
2877,baku,lewis_hamilton,15,2,16.690672
2883,ricard,max_verstappen,1,1,19.399279
2902,red_bull_ring,max_verstappen,1,1,19.581222
2922,red_bull_ring,max_verstappen,1,1,19.628877
2943,hungaroring,lewis_hamilton,2,1,19.06949
