In [36]:
import pandas as pd
import numpy as np
import warnings

from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor

from sys import platform

if platform == "win32":
    path = 'C:/Users/olive/GitHub/F1-Fantasy-Predictor/'
elif platform == "darwin":
    path = '~/Documents/GitHub/F1-Fantasy-Predictor/'

warnings.filterwarnings("ignore", category=RuntimeWarning) 
pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

In [37]:
data = pd.read_csv(path+'data/ml_input.csv')
parameters = pd.read_csv(path+'parameters/neutralnetworkregressor.csv')

In [38]:
N = 2023

In [39]:
params_to_drop = ['season', 'round', 'driver', 'constructor', 'circuit_id', 'podium', 'driver_points_from']

In [40]:
# X_train.columns

In [41]:
df = data.copy()

train = df[df.season < N]

scaler = StandardScaler()

X_train = train.drop(params_to_drop, axis=1)
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)

y_train = np.asarray(train.driver_points_from.values)

In [42]:
print(*data.columns)

season round podium driver constructor circuit_id driver_points_from qualifying_pos stage_q3 driver_points_per constructor_points_per constructor_points_before driver_points_before driver_wins_before constructor_wins_before constructor_standings_before driver_standings_before driver_last_3 constructor_last_3


In [43]:
def get_predictions(X_train, y_train, model):
    points = []
    winners = []
    for circuit in df[df.season == N]['round'].unique():

        test = df[(df.season == N) & (df['round'] == circuit)]
        X_test = test.drop(params_to_drop, axis=1)
        y_test = test.driver_points_from

        # Scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

        # Make predictions
        prediction_df = pd.DataFrame(model.predict(X_test), columns=['predicted_points'])
        prediction_df['actual_points'] = y_test.reset_index(drop=True)

        prediction_df['predicted_winner'] = prediction_df.predicted_points.map(lambda x: 1 if x == prediction_df.predicted_points.max() else 0)
        prediction_df['actual_winner'] = prediction_df.actual_points.map(lambda x: 1 if x == prediction_df.actual_points.max() else 0)

        winners += list(prediction_df.predicted_winner.values)
        points += list(prediction_df.predicted_points.values)
        

        # # Retrain model
        # X_train = pd.concat([X_train, X_test])
        # y_train = np.append(y_train, y_test)


        # model.fit(X_train, y_train)

    return points, winners

In [44]:
params = parameters.iloc[1]
hidden_layer_sizes = eval(params.hidden_layer_sizes)
activation = params.activation
solver = params.solver
alpha = params.alpha

params

Unnamed: 0                            389
model                        nn_regressor
hidden_layer_sizes    (75, 30, 50, 10, 3)
activation                       logistic
solver                                sgd
alpha                            0.023357
score                            0.789474
Name: 1, dtype: object

In [45]:
test = df[(df.season == N)]

model = MLPRegressor(
    hidden_layer_sizes=hidden_layer_sizes,
    activation=activation, 
    solver=solver, 
    alpha=alpha, 
    random_state=1)
    
model.fit(X_train, y_train)

points, winner = get_predictions(X_train, y_train, model)

test['predicted_points'] = points
test['predicted_winner'] = winner

In [46]:
rounds = test['round'].nunique()

print(f'Outright Winner Accuracy: { round(test.query("predicted_winner == 1 & podium == 1").shape[0] / rounds, 2) }')

print(f'Top-Two Accuracy: { round(test.query("predicted_winner == 1 & podium <= 2").shape[0] / rounds, 2) }')

print(f'Podium Accuracy: { round(test.query("predicted_winner == 1 & podium <= 3").shape[0] / rounds, 2) }')

Outright Winner Accuracy: 1.0
Top-Two Accuracy: 1.0
Podium Accuracy: 1.0


In [47]:
def highlight_row(row):
    if row['podium'] == 1:
        return ['background-color: limegreen; opacity: 1'] * len(row)
    elif 1 < row['podium'] <= 3:
        return ['background-color: yellow; opacity: 1; color: black'] * len(row)
    elif 3 < row['podium'] <= 10:
        return ['background-color: orange'] * len(row)
    elif row['podium'] > 10:
        return ['background-color: #E34234'] * len(row)

In [48]:
test.query('predicted_winner == 1')

Unnamed: 0,season,round,podium,driver,constructor,circuit_id,driver_points_from,qualifying_pos,stage_q3,driver_points_per,...,constructor_points_before,driver_points_before,driver_wins_before,constructor_wins_before,constructor_standings_before,driver_standings_before,driver_last_3,constructor_last_3,predicted_points,predicted_winner
3553,2023,1,1,max_verstappen,red_bull,bahrain,25.0,1,1,0.0,...,0.0,0.0,0,0,1,1,0.0,0.0,5.001249,1
3573,2023,2,1,sergio_perez,red_bull,jeddah,25.0,1,1,9.0,...,43.0,18.0,0,1,1,2,18.0,43.0,5.001282,1


In [50]:
q = test.query('round == 2')[['circuit_id', 'driver', 'podium', 'predicted_points']].sort_values('predicted_points', ascending=False)
q.style.set_properties(color='white').apply(highlight_row, axis=1)

Unnamed: 0,circuit_id,driver,podium,predicted_points
3573,jeddah,sergio_perez,1,5.001282
3592,jeddah,lance_stroll,20,5.001256
3575,jeddah,fernando_alonso,3,5.001255
3576,jeddah,george_russell,4,5.001245
3577,jeddah,lewis_hamilton,5,5.00124
3578,jeddah,carlos_sainz,6,5.001235
3581,jeddah,pierre_gasly,9,5.001212
3579,jeddah,charles_leclerc,7,5.00121
3580,jeddah,esteban_ocon,8,5.001189
3587,jeddah,oscar_piastri,15,5.001176
