In [1]:
import fastf1
import pandas as pd
import os

#dim reduction preprocessing
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import StandardScaler
import numpy as np

# Definitive Version

In [8]:
year = 2023
#Get the schedule for the year
year_schedule = fastf1.get_event_schedule(year)
print(year_schedule['RoundNumber'])

#Repeat for every race 
for round in year_schedule['RoundNumber'][1:]: #Ignore the first round because it's the testing event of the season, not an actual race

    #Create a dircetory where to store all the files
    dirname = '../data/'+str(round)+'_'+year_schedule['Location'][round]
    os.mkdir(dirname)

    #Get the race and load the data
    race = fastf1.get_session(year, year_schedule['Location'][round], 'R')
    race.load()

    #Laps
    race.laps.to_csv(dirname+'/laps.csv')
    #Results
    race.results.to_csv(dirname+'/results.csv')
    #Race info (unused)
    race.event.to_csv(dirname+'/race_info.csv')
    #Weahter data (unused)
    race.weather_data.to_csv(dirname+'/weather_data.csv')
    #Track status
    race.track_status.to_csv(dirname+'/track_status.csv')
    
    #Create a subdirectory for the car telemetries
    os.mkdir(dirname+'/telemetry')
    for i in race.car_data.keys():
        race.car_data[i].to_csv(dirname+'/telemetry/'+str(i)+'_telemetry.csv')


core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.2.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
11    11
12    12
13    13
14    14
15    15
16    16
17    17
18    18
19    19
20    20
21    21
22    22
Name: RoundNumber, dtype: int32


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '14', '55', '44', '18', '63', '77', '10', '23', '22', '2', '20', '21', '27', '24', '4', '31', '16', '81']
core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.2.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req

In [65]:
dataset = race.laps
cols_to_remove =['DriverNumber','PitOutTime','PitInTime','IsPersonalBest','FreshTyre','LapStartTime', 'Position', 'Deleted','DeletedReason','FastF1Generated','IsAccurate']
dataset = dataset.drop(cols_to_remove, axis = 1)
dataset['TrackStatus'] = dataset['TrackStatus'].astype('float') #I hate this
rows_to_remove = dataset.index[dataset['TrackStatus'] != 1].tolist()
dataset = dataset.drop(rows_to_remove, axis = 0)


#drop rows with missing data because i cant think of something better
dataset = dataset.dropna(axis=0)
#drop trackStatus as well after i used i to remove the rows i wanted
dataset = dataset.drop('TrackStatus', axis = 1)
#Convert timedelta64 to milliseconds
cols_to_change = ['Time', 'LapTime','Sector1Time',
       'Sector2Time', 'Sector3Time', 'Sector1SessionTime',
       'Sector2SessionTime', 'Sector3SessionTime','LapStartDate']
dataset[cols_to_change] = dataset[cols_to_change].astype(np.int64)/ int(1e6)
dataset =dataset.reset_index()
print(dataset.info())
print(dataset)

<class 'fastf1.core.Laps'>
RangeIndex: 563 entries, 0 to 562
Data columns (total 20 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   index               563 non-null    int64  
 1   Time                563 non-null    float64
 2   Driver              563 non-null    object 
 3   LapTime             563 non-null    float64
 4   LapNumber           563 non-null    float64
 5   Stint               563 non-null    float64
 6   Sector1Time         563 non-null    float64
 7   Sector2Time         563 non-null    float64
 8   Sector3Time         563 non-null    float64
 9   Sector1SessionTime  563 non-null    float64
 10  Sector2SessionTime  563 non-null    float64
 11  Sector3SessionTime  563 non-null    float64
 12  SpeedI1             563 non-null    float64
 13  SpeedI2             563 non-null    float64
 14  SpeedFL             563 non-null    float64
 15  SpeedST             563 non-null    float64
 16  Compound           

In [66]:

cols = ['Time', 'LapTime', 'LapNumber', 'Stint', 'Sector1Time',
       'Sector2Time', 'Sector3Time', 'Sector1SessionTime',
       'Sector2SessionTime', 'Sector3SessionTime', 'SpeedI1', 'SpeedI2',
       'SpeedFL', 'SpeedST', 'TyreLife', 'LapStartDate']

scaler= StandardScaler()
scaled = scaler.fit_transform(dataset[cols])

pca = PCA(n_components = 2)
components = pca.fit_transform(scaled)
principal_components = pca.components_
df_components = pd.DataFrame(data = components, columns=['PC1','PC2'])

df_components['Driver'] = dataset['Driver']
df_components['Team'] = dataset['Team']
df_components[cols] = dataset[cols]
print(df_components)

df_components.to_csv('PCA.csv')

          PC1       PC2 Driver             Team       Time  LapTime  \
0    4.718307 -1.963845    VER  Red Bull Racing  4184870.0  83391.0   
1    4.833223 -1.051094    VER  Red Bull Racing  4267974.0  83104.0   
2    4.297346 -3.429514    VER  Red Bull Racing  4350817.0  82843.0   
3    3.882889  2.921290    VER  Red Bull Racing  5836049.0  87827.0   
4    2.891328  0.190619    VER  Red Bull Racing  5919509.0  83460.0   
..        ...       ...    ...              ...        ...      ...   
558 -3.432884  1.876682    PIA          McLaren  9032094.0  81686.0   
559 -3.615807  1.848049    PIA          McLaren  9113619.0  81525.0   
560 -3.746293  1.818235    PIA          McLaren  9195371.0  81752.0   
561 -3.898716  1.843439    PIA          McLaren  9277108.0  81737.0   
562 -4.039168  1.873163    PIA          McLaren  9358791.0  81683.0   

     LapNumber  Stint  Sector1Time  Sector2Time  Sector3Time  \
0          4.0    1.0      28900.0      18326.0      36165.0   
1          5.0    1