# 6. FORMULA ONE RACING SEASONS

In [1]:
import fastf1
import pickle
import os
import copy
import calendar
import numpy as np
import pandas as pd

#Visualization
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from scipy.cluster.hierarchy import dendrogram, linkage
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as ssd


parent_dir = os.path.dirname(os.path.realpath("6. Formula 1 Racing Seasons.ipynb")) 
data_path = os.path.join(parent_dir, 'Data Objects')

In [2]:
def datetime2utc(df):
    """
    Converts timestamps to UTC and adds them to the dataframe while removing the old date format
    """
    series = df['Date']
    new_date_format    = [calendar.timegm(ttime.timetuple()) for ttime in series]
    df = df.drop('Date', axis=1) #If you wish to keep the old date format, comment this line
    df.loc[:, "Date_UTC"] = new_date_format    
    return df

In [3]:
with open(os.path.join(data_path, 'f1_22.pkl'), 'rb') as f:
    f1_22 = pickle.load(f)

In [4]:
with open(os.path.join(data_path, 'f1_23.pkl'), 'rb') as f:
    f1_23 = pickle.load(f)

In [5]:
print(f1_22.keys(),"\n")
print(f1_23.keys())

dict_keys(['Bahrain_Grand_Prix', 'Australian_Grand_Prix', 'Emilia_Romagna_Grand_Prix', 'Spanish_Grand_Prix', 'Monaco_Grand_Prix', 'Azerbaijan_Grand_Prix', 'Canadian_Grand_Prix', 'Austrian_Grand_Prix', 'French_Grand_Prix', 'Hungarian_Grand_Prix', 'Belgian_Grand_Prix', 'Dutch_Grand_Prix', 'Italian_Grand_Prix', 'Japanese_Grand_Prix', 'United_States_Grand_Prix', 'Mexico_City_Grand_Prix', 'Abu_Dhabi_Grand_Prix']) 

dict_keys(['Bahrain_Grand_Prix', 'Saudi_Arabian_Grand_Prix', 'Australian_Grand_Prix', 'Azerbaijan_Grand_Prix', 'Miami_Grand_Prix', 'Monaco_Grand_Prix', 'Spanish_Grand_Prix', 'Austrian_Grand_Prix', 'British_Grand_Prix', 'Belgian_Grand_Prix', 'Italian_Grand_Prix', 'Japanese_Grand_Prix', 'Mexico_City_Grand_Prix'])


In [6]:
common_gps = ['Bahrain_Grand_Prix',  'Australian_Grand_Prix', 'Azerbaijan_Grand_Prix',  'Monaco_Grand_Prix',  'Austrian_Grand_Prix', 'Belgian_Grand_Prix', 'Italian_Grand_Prix', 'Japanese_Grand_Prix', 'Mexico_City_Grand_Prix']

In [7]:
def normalize_driver_data(dictt):
    drv = {}
    for gp,df in dictt.items():  
        if gp in common_gps:
            drivers = set(df.index.get_level_values(0))
            drv[gp] = {}
            for driver in drivers:
                drv_df = df.loc[(driver, slice(None))]
                drv_df = datetime2utc(drv_df).reset_index(drop=True) #Make date integer and remove XYZ coordinate index
                
                scaler = StandardScaler()
                drv_df_n = pd.DataFrame(scaler.fit_transform(drv_df), columns = drv_df.columns)
        
                drv[gp][driver] = drv_df_n 
            
    return drv

In [8]:
drv22 = normalize_driver_data(f1_22)
drv23 = normalize_driver_data(f1_23)

In [12]:
drv22['Bahrain_Grand_Prix']

dict_keys(['3', '11', '24', '44', '10', '63', '55', '27', '47', '77', '6', '18', '1', '22', '4', '31', '23', '14', '20', '16'])

In [13]:
drv23['Bahrain_Grand_Prix']

dict_keys(['21', '11', '24', '44', '10', '63', '55', '27', '77', '18', '1', '22', '4', '2', '31', '23', '14', '20', '16', '81'])

In [11]:
with open(os.path.join(data_path,'pilots22.pkl'), 'wb') as f:
    pickle.dump(drv22, f)

with open(os.path.join(data_path,'pilots23.pkl'), 'wb') as f:
    pickle.dump(drv23, f)