In [None]:
#Imports
import fastf1 as ff1
from fastf1.ergast import Ergast
import fastf1.plotting
fastf1.plotting.setup_mpl(misc_mpl_mods=False) #Sets up Plot Formats
import logging
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from scipy.stats import mannwhitneyu

import math
import numpy as np
import random 
import pandas as pd

In [None]:
# Load the data
ff1.Cache.enable_cache('./cache')
# Minimize Logging
ff1.set_log_level(logging.ERROR)
logging.getLogger('pandas').setLevel(logging.CRITICAL)
logging.getLogger('matplotlib').setLevel(logging.ERROR)

In [None]:
#Code for Color Generation
def generate_random_color(): 
    return '#{:06x}'. format (random.randint(0, 0xFFFFFF))

#### Code To Retrieve Ergast Data from 2011 to 2024

In [None]:
def ergast_no_sessions():
    # Initialize Ergast API
    ergast = Ergast()
    # Get all seasons for driver Ricciardo
    ricciardo_seasons = ergast.get_seasons(driver="ricciardo")
    # Initialize sessions dictionary
    sessions = {}
    # Loop through each season
    for ind, season in enumerate(ricciardo_seasons['season']):
        # Get finishing status for Ricciardo for the season
        finishes = ergast.get_finishing_status(season=season, driver="ricciardo")
        # Get constructor info for Ricciardo for the season
        constructor = ergast.get_constructor_info(season=season, driver="ricciardo").constructorId[0]
        # Get driver info for the constructor for the season
        teammates = ergast.get_driver_info(season=season, constructor=constructor)
        # Initialize lists for positions and qualifying positions for Ricciardo and his teammates
        ricpositions = []
        ric_qual_positions = []
        teammatepositions_array = []
        teammate_qual_positions = []
        # Loop through each driver in the team
        for driver in list(teammates.driverId):
            # Get race results for the driver for the season
            results = ergast.get_race_results(season=season, driver=driver)
            # Get qualifying results for the driver for the season
            qual_results = ergast.get_qualifying_results(season=season, driver=driver)
            # Get positions and qualifying positions for the driver for the season
            positions = [int(results.content[race].position) for race in range(len(results.content))]
            qual_positions = [int(qual_results.content[qual].position) for qual in range(len(qual_results.content))]
            # Check if the driver is Ricciardo
            if (driver == "ricciardo"):
                # If yes, update Ricciardo's positions and qualifying positions
                ricpositions = positions
                ric_qual_positions = qual_positions
            else:
                # If no, update the teammate's positions and qualifying positions
                teammatepositions_array.append((driver, positions))
                teammate_qual_positions.append((driver, qual_positions))
            # Check if the driver's color is already in the color dictionary
            if driver not in colorDict:
                try:
                    # If not, try to get the driver's color from the plotting function
                    colorDict[driver] = ff1.plotting.driver_color(driver[:3].upper())
                except:
                    # If the plotting function fails, generate a random color for the driver
                    colorDict[driver] = generate_random_color()
        # Update the sessions dictionary with the positions and qualifying positions for Ricciardo and his teammates for the season
        sessions[season] = {
                'ricpositions': ricpositions,
                'drivers':list(teammates.driverId),
                'driver_color_dict': colorDict,
                'ric_qual_positions': ric_qual_positions,
                'teammate_positions_array': teammatepositions_array,
                'teammate_qual_positions': teammate_qual_positions
            }
    # Return the sessions dictionary
    return sessions


In [None]:
def ergast_with_sessions(sessions):
    positions_fig, positions_axs = plt.subplots(3, 5, figsize=(15, 8))
    quals_fig, quals_axs = plt.subplots(3, 5, figsize=(15, 8))
    quals_fig.suptitle("Ricciardo's Qualifications Compared to Teammates")
    positions_fig.suptitle("Ricciardo's Race Results Compared to Teammates")
    ind = 0
    for season, data in sessions.items():
        print("Season:", season)
        print("Race:")
        ricpositions = data['ricpositions']
        teammate_positions_array = data['teammate_positions_array']
        ric_qual_positions = data['ric_qual_positions']
        teammate_qual_positions = data['teammate_qual_positions']
        drivers = data['drivers']
        colorDict = data['driver_color_dict']
        for driver in drivers:
            if driver == "ricciardo":
                positions = ricpositions
                qual_positions = ric_qual_positions
            else:
                for i in range(len(teammate_positions_array)):
                    driver_value = teammate_positions_array[i]
                    if driver_value[0] == driver:
                        positions = teammate_positions_array[i][1]
                        qual_positions = teammate_qual_positions[i][1]
                        break

            positions_axs[ind//5, ind % 5].plot(
                range(len(positions)), positions, color=colorDict[driver], label=driver)
            quals_axs[ind//5, ind % 5].plot(range(len(qual_positions)),
                                            qual_positions, color=colorDict[driver], label=driver)
            
        #Mann Whitney U-test Code
        for driver_position in teammate_positions_array:
            stat, p_value = mannwhitneyu(ricpositions, driver_position[1], alternative='greater')
            print("Ricciardo's Race Positions compared to", driver_position[0])
            print('Statistics=%.3f, p=%.3f' % (stat, p_value))
            
            if p_value<0.1:
                print("There is a significant difference between the performance of the drivers in Races on average.\n")
            else:
                print("There is NO significant difference between the performance of the drivers in Races on average.\n")
                pass
        print("-------------------------\nQualifications:")
        for driver_position in teammate_qual_positions:
            stat, p_value = mannwhitneyu(ric_qual_positions, driver_position[1], alternative='greater')
            print("Ricciardo's Race Positions compared to", driver_position[0])
            print('Statistics=%.3f, p=%.5f' % (stat, p_value))
            if p_value<0.1:
                print("There is a significant difference between the performance of the drivers in Qualifications on average.\n")
            else:
                print("There is NO significant difference between the performance of the drivers in Qualifications on average.\n")
        print("--------------------------------------------------------")

        #Plotting
        positions_axs[ind//5, ind % 5].set_title(season)
        positions_axs[ind//5, ind % 5].legend(loc="upper right")
        positions_axs[ind//5, ind % 5].set_xlabel("Races")
        positions_axs[ind//5, ind % 5].set_ylabel("Finishing Position")
        quals_axs[ind//5, ind % 5].set_title(season)
        quals_axs[ind//5, ind % 5].legend(loc="upper right")
        quals_axs[ind//5, ind % 5].set_xlabel("Races")
        quals_axs[ind//5, ind % 5].set_ylabel("Qualifying Position")
        ind+=1

    #Show Plots
    plt.figure(1)
    plt.tight_layout()
    plt.figure(2)
    plt.tight_layout()
    plt.legend()
    plt.show()

In [None]:
import json
with open('comparison.json', 'r') as f:
    sessions = json.load(f)
    ergast_with_sessions(sessions)

In [None]:
def recentMetrics(): #Retrieve recent metrics (2018-Present) from FastF1 API
    seasonRange = range(2018, 2025)
    metrics = {"race_position":[], "qualifying_position": [], "fastest_lap":[], "positions_gained":[], "drpt":[], "dqpt":[], "aggregate_stat":[]}
    timingArr = []

    for season in seasonRange:
        seasonEvents = ff1.get_event_schedule(season)
        for ind, event in enumerate(seasonEvents['EventName']):
            try:
                race = ff1.get_event(season, event).get_race()
                race.load();
                race.results.loc[:, 'Position'] = race.results['Position'].fillna(19.0)
                race.results.loc[:, 'GridPosition'] = race.results['GridPosition'].fillna(19.0)
                ricciardoResults = race.results.loc[race.results["Abbreviation"]=="RIC"]
                teammateResults = race.results.loc[(race.results["TeamName"]==race.get_driver("RIC")["TeamName"]) & (race.results["Abbreviation"]!="RIC")]
                
                norm_position = 1-(int(ricciardoResults['Position'])/20)
                norm_qualy = 1-(int(ricciardoResults['GridPosition'])/20)
                norm_fastest_lap = 1 if (race.laps.pick_fastest()['Driver']=="RIC") else 0
                norm_pos_gained= (int(ricciardoResults['GridPosition'])-int(ricciardoResults['Position']))/19
                norm_team_race_diff = (int(teammateResults["Position"])-int(ricciardoResults['Position']))/19
                norm_team_qualy_diff = (int(teammateResults['GridPosition'])-int(ricciardoResults["GridPosition"]))/19
            except:
                continue
            metrics['race_position'].append(norm_position)
            metrics['qualifying_position'].append(norm_qualy)
            metrics['fastest_lap'].append(norm_fastest_lap)
            metrics["positions_gained"].append(norm_pos_gained)
            metrics["drpt"].append(norm_team_race_diff)
            metrics["dqpt"].append(norm_team_qualy_diff)
            print(f"{season}:", event, "-", f"{ind+1}/{len(seasonEvents['EventName'])}")
    return metrics



In [None]:
def toDf(metrics):
    labels = ["race_position", "qualifying_position", "fastest_lap", "positions_gained", "drpt", "dqpt"]
    inputs = np.column_stack([np.array(metrics[label]) for label in labels])
    inputsDF = pd.DataFrame(inputs, columns=labels) 
    inputsDF.to_csv("metrics.csv")

metricsVal = recentMetrics()
toDf(metricsVal)

In [None]:
#Imports for analytics.
import logging
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression

import math
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

tsne = TSNE(n_components=1)
pca = PCA(n_components=1)


In [None]:
def parametrize(metrics):
    model = LinearRegression()
    labels = ["race_position", "qualifying_position", "fastest_lap", "positions_gained", "drpt", "dqpt"]
    parameters = {'fit_intercept':[True,False],'copy_X':[True, False]}
    grid_search = GridSearchCV(model, parameters)
    
    target = tsne.fit_transform(metrics) #Fit the components to a T-SNE distribution to reduce dimensions.
    # Fit the model and tune parameters
    grid_search.fit(metrics, target)

    # Get the best parameters
    best_params = grid_search.best_params_

    # Update weights
    weights = grid_search.best_estimator_.coef_

    # Print the best parameters and weights
    print('Best parameters:', best_params)
    print('Weights:', weights)

    return grid_search, weights, parameters

In [None]:
metrics = pd.read_csv("metrics.csv") # Data Retrieval for use in testing
grid_search, weights, parameters = parametrize(metrics) # Call the trainer.
normalized_metrics = [i[0] for i in grid_search.predict(metrics).tolist()] # Return Trainer outputs for each race.

In [None]:
from scipy import stats

slope, intercept, r_value, p_value, std_err = stats.linregress(range(len(normalized_metrics)), normalized_metrics);

x = np.linspace(0, len(normalized_metrics))
y = slope*x+intercept

print(f"Slope: {slope}")
print(f"p-value: {p_value}")
plt.plot(range(len(normalized_metrics)), normalized_metrics, color=ff1.plotting.driver_color("RIC"))
plt.plot(x, y, label="Linearized")
plt.xlabel('Races Since The Start of The 2018 Season')
plt.ylabel('Driver Performance Index (DPI)')
plt.show()