In [1]:
import os
import pandas as pd
import numpy as np
from collections import Counter
from itertools import combinations, product
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from datetime import datetime
import itertools
from xgboost import XGBRegressor

In [2]:
os.chdir("..")
os.getcwd()

'm:\\coding\\f1_fantasy_combinator\\src'

In [3]:
from utils import read_json
from config import driver_code_to_driver_mapping as driver_code_to_driver_name_mapping

driver_data_path = "data_collection/data/driver" 
team_data_path = "data_collection/data/constructor" 
driver_list = os.listdir(driver_data_path)


In [4]:
def train(model_dict, model_type, driver_name, X, y):
    X_array, y_array = np.array(X), np.array(y)
    match model_type:
        case "svr":
            reg = SVR(kernel="rbf").fit(X_array, y_array)
        case "rf":
            reg = RandomForestRegressor(max_depth=3).fit(X_array, y_array)
        case "ridge":
            reg = Ridge(alpha=1.0).fit(X_array, y_array)
        case "lg":
            reg = LinearRegression().fit(X_array, y_array)
        case "xgb":
            reg = XGBRegressor().fit(X_array, y_array)
    model_dict[driver_name] = reg
    return model_dict

def predict(model_dict, driver_name, X):
    return round(model_dict[driver_name].predict(X)[0]) 

In [5]:
def calculate_avg_array_from_cumsum(input_cumsum):
    return input_cumsum/np.arange(1,len(input_cumsum)+1)

def get_previous_avg_overtakes(race_before_json):
    overtakes = [x['overtake'] for x in race_before_json.values()]
    overtake_cumsum = np.cumsum(overtakes)
    avg_overtake_points_array = calculate_avg_array_from_cumsum(overtake_cumsum) #overtake_cumsum/np.arange(1,len(overtake_cumsum)+1)
    return avg_overtake_points_array

def get_dotd_probability(race_before_json):
    dotd = [x['dotd'] for x in race_before_json.values()]
    dotd_cumsum = np.cumsum(dotd)
    avg_overtake_points_array = calculate_avg_array_from_cumsum(dotd_cumsum) #dotd_cumsum/np.arange(1,len(dotd_cumsum)+1)
    return avg_overtake_points_array

def get_previous_fantasy_scores(race_before_json):
    fantasy_scores = [x['fantasy_score'] for x in race_before_json.values()]
    fantasy_cumsum = np.cumsum(fantasy_scores)
    avg_fantasy_points_array = calculate_avg_array_from_cumsum(fantasy_cumsum) #fantasy_cumsum/np.arange(1,len(fantasy_cumsum)+1)
    return avg_fantasy_points_array

def get_previous_dnf_prob(race_before_json):
    is_dnf = [x['dnf'] for x in race_before_json.values()]
    dnf_cumsum = np.cumsum(is_dnf)
    dnf_prob_array = calculate_avg_array_from_cumsum(dnf_cumsum) #dnf_cumsum/np.arange(1,len(dnf_cumsum)+1)
    return dnf_prob_array

In [6]:
def form_x_y(data_json, feature_set, is_training):
    fp1_array, fp2_array, fp3_array, fp_weighted_mean = get_free_practice_info(data_json, is_training) 
    #transformations
    if is_training:
        fantasy_scores = [x['fantasy_score'] for x in data_json.values()]
        dnf_prob_array = get_previous_dnf_prob(data_json)
        avg_overtake_array = get_previous_avg_overtakes(data_json)
        dotd_prob = get_dotd_probability(data_json)
        previous_score_array = get_previous_fantasy_scores(data_json)
        # weight_fp_mean_array = get_weighted_fp_mean_array(data_json)
    else:
        fantasy_scores = 0
        dnf_prob_array = get_previous_dnf_prob(data_json)[-1]
        avg_overtake_array = get_previous_avg_overtakes(data_json)[-1]
        dotd_prob = get_dotd_probability(data_json)[-1]
        previous_score_array = get_previous_fantasy_scores(data_json)[-1]
        # weight_fp_mean_array = get_weighted_fp_mean_array(data_json)[-1]

    temp_df = pd.DataFrame({
        "fp1" : fp1_array,
        "fp2": fp2_array,
        "fp3": fp3_array,
        "avg_score" : previous_score_array,
        "avg_overtake": avg_overtake_array,
        "dnf_prob": dnf_prob_array,
        "dotd_prob": dotd_prob,
        "weighted_fp_mean": fp_weighted_mean,
        "y": fantasy_scores
    })
    
    temp_df = temp_df.dropna()
    
    return temp_df[feature_set], temp_df["y"]

def get_free_practice_info(data_json, is_training):
    fp1_array = []
    fp2_array = []
    fp3_array = []
    fp_mean_array = []
    raw_info = [(x['fp1'], x.get('fp2', np.nan), x.get('fp3', np.nan), x.get("sprint_quali", np.nan), x["dnf"]) for x in data_json.values()]
    for fp1, fp2, fp3, sprint_quali, is_dnf in raw_info:
        if not (is_dnf and is_training):
            mean_val = np.nanmean([int(x) for x in [fp1, fp2, fp3, sprint_quali] if not x is np.nan])
            if not sprint_quali is np.nan:
                fp3_array.append(int(np.nan_to_num(sprint_quali, nan= mean_val)))
                fp2_array.append(int(np.nan_to_num(fp1, nan= mean_val)))
                fp1_array.append(int(np.nan_to_num(fp1, nan= mean_val)))
            else:
                fp3_array.append(int(np.nan_to_num(fp3, nan= mean_val)))
                fp2_array.append(int(np.nan_to_num(fp2, nan= mean_val)))
                fp1_array.append(int(np.nan_to_num(fp1, nan= mean_val)))

            fp_mean_array.append(round(np.average([fp1_array[-1], fp2_array[-1], fp3_array[-1]], weights=[2,5,10]),3))
        else:
            fp3_array.append(None)
            fp2_array.append(None)
            fp1_array.append(None)
            fp_mean_array.append(0)
    
    return fp1_array, fp2_array, fp3_array, fp_mean_array

In [7]:
def train_fantasy_score_from_practice_model(model_dict, model_type, feature_set, driver_name, data_json, till_race_no):
    race_before_json = {}
    for i in range(1,till_race_no):
        race_before_json[f"race_{i}"] = data_json[f"race_{i}"]
    
    X, y = form_x_y(race_before_json, feature_set, is_training=True)

    model_dict = train(model_dict, model_type, driver_name, X, y)
    return model_dict

def predict_fantasy_score_from_practice_model(model_dict, feature_set, driver_name, race_no):
    driver_json = read_json(os.path.join(driver_data_path, f"{driver_name}.json"))
    ##filter driver_json for specific race
    input_json = {}
    race_before_json = {}
    input_json[f"race_{race_no}"] = driver_json[f"race_{race_no}"]
    
    for i in range(1,race_no):
        race_before_json[f"race_{i}"] = driver_json[f"race_{i}"]
    
    X, _ = form_x_y(input_json, feature_set, is_training=False)
    # X = np.array(get_free_practice_info(input_json))
    return predict(model_dict, driver_name, np.array(X))

### Approach 3 - using fp1, fp2, fp3 to predict fantasy scores


### Validating current approach using simulation

In [8]:
def calculate_mse_for_df(input_df):
    return mean_squared_error(input_df.actual_fantasy_points, input_df.predicted_fantasy_points)

In [9]:
def calculate_cost_change(team_array, master_dict):
    cost_change = 0
    for team_member in team_array:
        # driver_change
        if team_member in master_dict["driver"]:
            cost_change+=np.nan_to_num(master_dict["driver"][team_member]["cost_change"], 0)
        else:
            # team_change
            cost_change+=master_dict["team"][team_member]["cost_change"] or 0
    return round(cost_change,2)

In [10]:
def update_constructor_with_predicted_fantasy_score(input_dict, input_df):
    team_sum = input_df.groupby("team", as_index=False).sum()[["team","predicted_fantasy_points"]]
    for team_name, team_predicted_score in zip(team_sum.team, team_sum.predicted_fantasy_points):
        input_dict[team_name]["predicted_score"] = team_predicted_score
    return input_dict

In [23]:
dominant_drivers = ["red_driver_1", "red_driver_2", "mcl_driver_1", "mcl_driver_2", "fer_driver_1", "fer_driver_2", "mer_driver_1", "mer_driver_2"]

def find_team_cost(team_combo, master_dict):
    team_cost = 0
    for team_member in team_combo:
        if team_member in master_dict["driver"]:
            team_cost+=master_dict["driver"][team_member]["cost"]
        else:
            team_cost+=master_dict["team"][team_member]["cost"]
    return team_cost

def find_team_score_in_reality(team_combo, master_dict, drive_2x = None):
    team_score = 0
    hightest_score = 0
    for team_member in team_combo:
        if team_member in master_dict["driver"]:
            driver_score = master_dict["driver"][team_member]["score"]
            if driver_score > hightest_score:
                hightest_score = driver_score
            team_score+=driver_score
        else:
            team_score+=master_dict["team"][team_member]["score"]
    
    if drive_2x is None:
        return team_score + hightest_score
    else:
        return team_score + master_dict["driver"][drive_2x]["score"]

def find_team_score_based_on_predicted_scores(team_combo, master_dict, driver_2x):
    team_score = 0
    for team_member in team_combo:
        if team_member in master_dict["driver"]:
            driver_score = master_dict["driver"][team_member]["predicted_score"]
            team_score+=driver_score
        else:
            team_score+=master_dict["team"][team_member]["predicted_score"]

    return team_score + master_dict["driver"][driver_2x]["predicted_score"]

def recalibrate_2x_driver(predicted_team_df):
    for row in range(predicted_team_df.shape[0]):
        if predicted_team_df.iloc[row]["2x_driver"] not in dominant_drivers:
            original_2x_driver = predicted_team_df.iloc[row]["2x_driver"]
            print("Original 2x driver ", original_2x_driver)
            print("Re-calibrate 2x driver")
            new_2x_driver = [x for x in predicted_team_df.iloc[row]["team"] if x in dominant_drivers][0]
            print("New 2x driver ", new_2x_driver)
            predicted_team_df.loc[predicted_team_df['2x_driver']==original_2x_driver, '2x_driver'] = new_2x_driver
    return predicted_team_df

def find_top_teams(master_dict, budget, top_k, previous_team):
    team_cost_array = []
    team_real_score_array = []
    team_predicted_score_array = []
    selected_team_array = []
    driver_2x_array = []
    penalty_for_additional_team_change_array = []

    for combo_driver,combo_team  in product(combinations(master_dict["driver"].keys(), 5), combinations(master_dict["team"].keys(), 2)):
        penalty_for_additional_team_change = 0
        combo = combo_driver + combo_team
        team_cost = find_team_cost(combo, master_dict)
        if team_cost > budget:
            continue
        
        if len(previous_team)!=0:
            additional_changes = max((how_many_drivers_differ(previous_team, combo) - 2), 0)
            penalty_for_additional_team_change = 10*additional_changes
        
        #finding 2x driver
        driver_order_based_on_predicted_score = [(master_dict["driver"][x]["predicted_score"], x) for x in combo_driver]
        predicted_driver_2x = max(driver_order_based_on_predicted_score)[1]

        ## optional
        for driver_order in driver_order_based_on_predicted_score:
            if driver_order[1] in dominant_drivers:
                predicted_driver_2x = driver_order[1]
                break


            #  predicted_driver_2x = [x for x in combo_driver if x in dominant_drivers][0]

        # real_driver_2x = max([(master_dict["driver"][x]["score"], x) for x in combo_driver])[1]
        ## predicted fantasy score team
        real_team_score = find_team_score_in_reality(combo, master_dict, predicted_driver_2x) - penalty_for_additional_team_change
        predicted_team_score = find_team_score_based_on_predicted_scores(combo, master_dict, predicted_driver_2x) - penalty_for_additional_team_change
        
        # print(combo, team_cost, team_score)
        driver_2x_array.append(predicted_driver_2x)
        team_cost_array.append(team_cost)
        selected_team_array.append(list(combo))
        team_real_score_array.append(real_team_score)
        team_predicted_score_array.append(predicted_team_score)
        penalty_for_additional_team_change_array.append(penalty_for_additional_team_change)

    top_team_df = pd.DataFrame({
        "team" : selected_team_array,
        "cost": team_cost_array,
        "predicted_score": team_predicted_score_array,
        "real_score": team_real_score_array,
        "2x_driver": driver_2x_array,
        "additional_change_penalty": penalty_for_additional_team_change_array
    })

    theoretical_best_team = top_team_df.sort_values(by="real_score", ascending=False)[:top_k]
    top_predicted_team = top_team_df.sort_values(by="predicted_score", ascending=False)[:top_k]
    # top_predicted_team = recalibrate_2x_driver(top_predicted_team)

    # top_predicted_team["real_score"] = find_team_score_in_reality(top_predicted_team["team"].values[0], 
    #                                                                              master_dict, 
    #                                                                              top_predicted_team["2x_driver"].values[0]) - top_predicted_team["additional_change_penalty"].values[0]

    return top_predicted_team,  theoretical_best_team

def how_many_drivers_differ(driver_list_1, driver_list_2):
    differing_number = 0
    for driver in driver_list_1:
        if driver not in driver_list_2:
            differing_number+=1
    return differing_number

def form_driver_cost_score_dict(race_df):
    output_dict = {}
    for driver, score, cost, predicted_score, cost_change in zip(race_df.driver, race_df.actual_fantasy_points, race_df.fantasy_cost, 
                                                                 race_df.predicted_fantasy_points, race_df.cost_change):
        output_dict[driver] = {"cost": cost, "score":score, "predicted_score":predicted_score, "cost_change":cost_change}
    return output_dict

def form_team_cost_score_dict(team_array, score_array, cost_array):
    output_dict = {}
    for team, score, cost in zip(team_array, score_array, cost_array):
        output_dict[team] = {"cost": cost, "score":score}
    return output_dict

def driver_code_from_driver_name(input_driver_name):
    for driver_code, driver_name in driver_code_to_driver_name_mapping.items():
        if driver_name == input_driver_name:
            return driver_code

#### try brute force: calculate mse for all model, all feature sets

In [24]:
# from itertools import chain, product
# feature_available = ["avg_score","avg_overtake","dnf_prob","dotd_prob", "weighted_fp_mean"]
# all_possible_feature_combination = []
# for L in range(len(feature_available) + 1):
#     for subset in itertools.combinations(feature_available, L):
#         combo = ["fp1", "fp2", "fp3"] + list(subset)
#         all_possible_feature_combination.append(combo)


In [25]:
# from tqdm import tqdm
# fantasy_score_model_from_fp_dict = {}

# mse_array = []
# model_type_array = []
# feature_array = []

# for model_type in tqdm(["rf", "svr", "xgb"]):
#     for feature_set in tqdm(all_possible_feature_combination):
#         predicted_fantasy_points_array = []
#         actual_fantasy_points_array = []
#         for race_no in range(2,25):
#             for driver_data in driver_list:
#                 driver_name = driver_data.split(".")[0]
#                 driver_json = read_json(os.path.join(driver_data_path, driver_data))
#                 fantasy_score_model_from_fp_dict = train_fantasy_score_from_practice_model(fantasy_score_model_from_fp_dict, model_type, list(feature_set), driver_name, 
#                                                                                     driver_json, till_race_no=race_no)
#                 predicted_fantasy_points = predict_fantasy_score_from_practice_model(fantasy_score_model_from_fp_dict, list(feature_set), driver_name, race_no)
#                 actual_fantasy_points = driver_json[f"race_{race_no}"]["fantasy_score"]
#                 predicted_fantasy_points_array.append(predicted_fantasy_points)
#                 actual_fantasy_points_array.append(actual_fantasy_points)

#         prediction_df = pd.DataFrame({
#                 "predicted_fantasy_points": predicted_fantasy_points_array,
#                 "actual_fantasy_points" : actual_fantasy_points_array,
#             })
#         mse_array.append(calculate_mse_for_df(prediction_df))
#         model_type_array.append(model_type)
#         feature_array.append(feature_set)

# df = pd.DataFrame({
#     "model": model_type_array,
#     "feature_set": feature_array,
#     "error" : mse_array,
# })

# df.to_csv("notebooks/exploration_3_df.csv", index=False)

In [26]:
heuristic_scores = [40, 30, 25, 20, 20] + list(range(15,0,-1))
fp1_array = []
fp2_array = []
fp3_array = []
fantasy_score_array = []
for driver in driver_list:
    driver_json = read_json(os.path.join(driver_data_path, driver))
    race1_json = driver_json["race_1"]
    fp1_array.append(int(race1_json["fp1"]))
    fp2_array.append(int(race1_json["fp2"]))
    fp3_array.append(int(race1_json["fp3"]))
    fantasy_score_array.append(int(race1_json["fantasy_score"]))
race_1_df = pd.DataFrame({
    "driver" : [x.split(".")[0] for x in driver_list],
    "fp1" : fp1_array,
    "fp2" : fp2_array,
    "fp3": fp3_array,
    "fantasy_score": fantasy_score_array
})
race_1_df["weighted_fp_mean"] = race_1_df.apply(lambda x : round(np.average([x.fp1, x.fp2, x.fp3], weights=[2,5,10]),2), axis=1)
race_1_prediction_dict = dict(zip(race_1_df.sort_values(by="weighted_fp_mean")["driver"].to_list(), heuristic_scores))
    

In [27]:
# race_1_df["mean_fp"] = race_1_df.apply(lambda x : np.mean([x.fp1, x.fp2, x.fp3]), axis=1)
# race_1_df["weighted_fp_mean"] = race_1_df.apply(lambda x : round(np.average([x.fp1, x.fp2, x.fp3], weights=[2,5,10]),2), axis=1)
# race_1_df[["fp1", "fp2", "fp3","mean_fp", "fantasy_score"]].corr()
race_1_df

Unnamed: 0,driver,fp1,fp2,fp3,fantasy_score,weighted_fp_mean
0,alp_driver_1,18,16,20,6,18.59
1,alp_driver_2,17,18,18,7,17.88
2,ast_driver_1,5,3,2,7,2.65
3,ast_driver_2,14,8,10,8,9.88
4,fer_driver_1,8,9,4,22,5.94
5,fer_driver_2,11,4,1,36,3.06
6,haa_driver_1,20,7,9,-3,9.71
7,haa_driver_2,19,14,14,7,14.59
8,kck_driver_1,10,17,17,0,16.18
9,kck_driver_2,15,19,16,11,16.76


In [16]:
fantasy_score_model_from_fp_dict = {}
race_no_array = []
driver_array = []
predicted_fantasy_points_array = []
actual_fantasy_points_array = []
fantasy_cost_array = []
dnf_array = []
race_name_array = []
cost_change_array = []
model_type = "rf"
feature_set = ['fp1', 'fp2', 'fp3', 'avg_score', 'dnf_prob', 'dotd_prob']
## for race-1
for race_no in range(1,25):
    # train for n-1 races
    for driver_data in driver_list:
        driver_name = driver_data.split(".")[0]
        driver_json = read_json(os.path.join(driver_data_path, driver_data))
        
        # if this is first race, dont train anything directly predict using free practice
        if race_no == 1:
            predicted_fantasy_points_array.append(race_1_prediction_dict[driver_name])
        else:
            fantasy_score_model_from_fp_dict = train_fantasy_score_from_practice_model(fantasy_score_model_from_fp_dict, model_type, feature_set, driver_name, 
                                                                                    driver_json, till_race_no=race_no)
            # predict for nth race
            predicted_fantasy_points = predict_fantasy_score_from_practice_model(fantasy_score_model_from_fp_dict, feature_set, driver_name, race_no)
            predicted_fantasy_points_array.append(predicted_fantasy_points)
        
        actual_fantasy_points = driver_json[f"race_{race_no}"]["fantasy_score"]
        race_no_array.append(race_no)
        race_name_array.append(driver_json[f"race_{race_no}"]["race_name"])
        driver_array.append(driver_name)
        actual_fantasy_points_array.append(actual_fantasy_points)
        fantasy_cost_array.append(driver_json[f"race_{race_no}"]["fantasy_cost"])
        dnf_array.append(driver_json[f"race_{race_no}"]["dnf"])
        cost_change_array.append(driver_json[f"race_{race_no}"]["cost_change"])

prediction_df = pd.DataFrame({
    "race_no" : race_no_array,
    "race_name": race_name_array,
    "driver": driver_array,
    "dnf": dnf_array,
    "predicted_fantasy_points": predicted_fantasy_points_array,
    "actual_fantasy_points" : actual_fantasy_points_array,
    "fantasy_cost" : fantasy_cost_array,
    "cost_change": cost_change_array,
})
prediction_df["team"] = prediction_df.driver.apply(lambda x: x.split("_")[0])

In [17]:
prediction_df.head()

Unnamed: 0,race_no,race_name,driver,dnf,predicted_fantasy_points,actual_fantasy_points,fantasy_cost,cost_change,team
0,1,bahrain,alp_driver_1,False,1,6,7.8,-0.1,alp
1,1,bahrain,alp_driver_2,False,2,7,7.8,0.5,alp
2,1,bahrain,ast_driver_1,False,40,7,15.8,0.1,ast
3,1,bahrain,ast_driver_2,False,10,8,10.7,0.5,ast
4,1,bahrain,fer_driver_1,False,20,22,19.1,0.3,fer


In [28]:
calculate_mse_for_df(prediction_df)

137.01041666666666

In [29]:
## generic prediction for first race

In [41]:
# %%capture cap

change_message = "encouraging_additional_change"
budget = 100
driver_name_to_driver_code_mapping = {v:k for k,v in driver_code_to_driver_name_mapping.items()}


actual_best_team_score_array = []
predicted_team_score_array = []
previous_actual_best_team_code = []
previous_predicted_best_team = []
previous_predicted_best_team_code = None

## visualization things
mse_error_array = []
race_name_array = []
score_of_predicted_team_array = []
theoretical_best_score_array = []
differing_team_members = []
# Run simulation
for i in range(1,25):
    penalty_for_additional_team_change = 0
    constructor_dict = read_json(os.path.join(team_data_path, "constructor_info.json"))
    race_df = prediction_df[prediction_df["race_no"] == i]
    # keeping cost and scores in dict for fast reading
    master_dict = {}
    master_dict["driver"] = form_driver_cost_score_dict(race_df)
    master_dict["team"] = update_constructor_with_predicted_fantasy_score(constructor_dict[f"race_{i}"], race_df)
    print(f"{i}. {race_df.race_name.values[0]} gp, with budget {round(budget,2)} mse error = {round(calculate_mse_for_df(race_df),3)}")
    mse_error_array.append(round(calculate_mse_for_df(race_df),3))
    # finding best predicted and actual top team
    predicted_best_team_df, actual_best_team_df = find_top_teams(master_dict, budget, 1, previous_predicted_best_team)
    # predicted_best_team, actual_best_team = find_top_teams(master_dict, budget, 1, previous_predicted_best_team)
    
    ## extract relevant info from top teams
    predicted_best_team = predicted_best_team_df["team"].values[0]
    predicted_best_team_actual_score = predicted_best_team_df["real_score"].values[0]
    driver_2x = predicted_best_team_df["2x_driver"].values[0]
    predicted_fantasy_score = predicted_best_team_df["predicted_score"].values[0]

    actual_best_team = actual_best_team_df["team"].values[0]
    actual_best_team_actual_score = actual_best_team_df["real_score"].values[0]

    penalty_for_additional_team_change = predicted_best_team_df["additional_change_penalty"].values[0]
    predicted_best_team_code = [driver_name_to_driver_code_mapping[x] for x in predicted_best_team_df["team"].values[0][:5]] + predicted_best_team_df["team"].values[0][-2:]
    actual_best_team_code = [driver_name_to_driver_code_mapping[x] for x in actual_best_team_df["team"].values[0][:5]] + actual_best_team_df["team"].values[0][-2:]
    
    ## new budget will be old team's cost
    budget += calculate_cost_change(predicted_best_team_df["team"].values[0], master_dict) #predicted_best_team.cost.values[0]
    
    # calculate additional team change penalty
    previous_predicted_best_team_code = previous_predicted_best_team_code if previous_predicted_best_team_code is not None else predicted_best_team_code
    additional_team_change = how_many_drivers_differ(predicted_best_team_code, previous_predicted_best_team_code)
    
    # set current predicted best team as previous team for next race
    previous_predicted_best_team = predicted_best_team
    previous_predicted_best_team_code = predicted_best_team_code
    
    # penalty adjusted fantasy scores
    predicted_teams_penalty_adjusted_actual_score = predicted_best_team_actual_score #- penalty_for_additional_team_change
    actual_teams_penalty_adjusted_actual_score = actual_best_team_actual_score #- penalty_for_additional_team_change
    
    predicted_team_score_array.append(predicted_teams_penalty_adjusted_actual_score)
    actual_best_team_score_array.append(actual_teams_penalty_adjusted_actual_score)
    differing_team_members.append(how_many_drivers_differ(actual_best_team_code,predicted_best_team_code))
    
    race_name_array.append(race_df.race_name.values[0])
    score_of_predicted_team_array.append(predicted_teams_penalty_adjusted_actual_score)
    theoretical_best_score_array.append(actual_teams_penalty_adjusted_actual_score)

    print(f"penalty for best team {penalty_for_additional_team_change}")
    print(f"predicted best team {predicted_best_team_code}, 2x driver {driver_name_to_driver_code_mapping[driver_2x]} with actual score {predicted_teams_penalty_adjusted_actual_score} with predicted score {predicted_fantasy_score}")
    print(f"while actual best team {actual_best_team_code} with best possible score of {actual_teams_penalty_adjusted_actual_score}")
    print(f"Number of drivers differed in predicted and actual best teams {differing_team_members[-1]}")
    print(f"Number of drivers changed from previous team selection {additional_team_change}")
    print("--- - -- - - - - -  - -")

differing_team_members_avg = round(np.mean(differing_team_members),2)
print(f"Season score of predicted teams {np.sum(predicted_team_score_array)}, while actual teams {np.sum(actual_best_team_score_array)}")
print(f"Avg differing suggestions of {differing_team_members_avg}")
current_time = str(datetime.isoformat(datetime.now()).split(".")[0]).replace(":","_")

# with open(f"notebooks/score_{np.sum(predicted_team_score_array)}_differing_{differing_team_members_avg}_{change_message}_{current_time}.txt", 'w') as f:
#     f.write(cap.stdout)



1. bahrain gp, with budget 100 mse error = 118.45
penalty for best team 0
predicted best team ['ALO', 'SAI', 'HUL', 'RUS', 'ALB', 'fer', 'ast'], 2x driver SAI with actual score 189 with predicted score 238
while actual best team ['OCO', 'STR', 'SAI', 'MAG', 'ZHO', 'red', 'fer'] with best possible score of 267
Number of drivers differed in predicted and actual best teams 5
Number of drivers changed from previous team selection 0
--- - -- - - - - -  - -
2. saudi-arabia gp, with budget 101.1 mse error = 123.7
penalty for best team 30
predicted best team ['OCO', 'STR', 'SAI', 'MAG', 'ZHO', 'red', 'fer'], 2x driver SAI with actual score 196 with predicted score 239
while actual best team ['ALO', 'LEC', 'HUL', 'BOT', 'COL', 'red', 'fer'] with best possible score of 253
Number of drivers differed in predicted and actual best teams 5
Number of drivers changed from previous team selection 5
--- - -- - - - - -  - -
3. australia gp, with budget 102.2 mse error = 210.25
penalty for best team 0
pre

In [32]:
with open(f"notebooks/score_{np.sum(predicted_team_score_array)}_differing_{differing_team_members_avg}_{change_message}_{current_time}.txt", 'w') as f:
    f.write(cap.stdout)

Visualizations

In [42]:
mse_error_array , race_name_array , score_of_predicted_team_array, theoretical_best_score_array, differing_team_members

([118.45,
  123.7,
  210.25,
  85.7,
  117.8,
  113.35,
  103.35,
  355.95,
  347.1,
  55.95,
  88.15,
  125.7,
  61.45,
  110.9,
  33.35,
  64.65,
  106.85,
  78.6,
  81.9,
  108.35,
  339.5,
  109.95,
  181.7,
  165.6],
 ['bahrain',
  'saudi-arabia',
  'australia',
  'japan',
  'china',
  'miami',
  'emilia-romagna',
  'monaco',
  'canada',
  'spain',
  'austria',
  'great-britain',
  'hungary',
  'belgium',
  'netherlands',
  'italy',
  'azerbaijan',
  'singapore',
  'united-states',
  'mexico',
  'brazil',
  'las-vegas',
  'qatar',
  'abu-dhabi'],
 [189,
  196,
  224,
  267,
  354,
  229,
  187,
  81,
  105,
  220,
  217,
  194,
  236,
  196,
  271,
  225,
  274,
  227,
  351,
  274,
  207,
  276,
  262,
  184],
 [267,
  253,
  287,
  293,
  361,
  304,
  231,
  255,
  274,
  274,
  320,
  249,
  264,
  255,
  279,
  297,
  301,
  254,
  369,
  303,
  422,
  320,
  318,
  327],
 [5, 5, 4, 3, 1, 4, 5, 4, 6, 4, 4, 3, 3, 4, 4, 3, 2, 2, 1, 2, 5, 3, 3, 4])

In [45]:
import plotly.express as px 
df = pd.DataFrame({
    "race": race_name_array,
    "error" : mse_error_array,
    "score_of_predicted_team": score_of_predicted_team_array,
    "score_of_theoretical_best_team": theoretical_best_score_array
})


In [81]:
fig = px.line(df, x='race', y=df.columns[1:4], title="Simulation of 2024 F1 season", height = 500, width=1200, 
              labels={"race": "Race", "value": "Score"}, line_shape = "spline", markers=True,
              color_discrete_sequence=["red", "blue", "green"],
            #   line_dash_map={'error': 'dash'},
              line_dash_sequence=["solid", "dot", "dot"],
              )
fig.update_xaxes(tickangle=90)
fig.update_traces(
    selector={"name": "error"}, 
    line={"dash": "dash"}
)
fig.show()