In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# import f1 package
import fastf1 as ff1
from fastf1 import plotting
plotting.setup_mpl()

In [2]:
ff1.Cache.enable_cache('../F1/cache')

In [3]:
f1_cal = ['BAHRAIN GP',
            'SAUDI ARABIAN GRAND PRIX',
            'AUSTRALIAN GP',
            'EMILIA ROMAGNA GRAND PRIX',
            'MIMI GRAND PRIX',
            'SPANISH GP',
            'MONACO GP',
            'AZERBAIJAN GP',
            'CANADIAN GP',
            'BRITISH GP',
            'AUSTRIAN GP',
            'FRENCH GRAND PRIX',
            'HUNGARIAN GP',
            'BELGIAN GP',
            'DUTCH GRAND PRIX',
            'ITALIAN GP',
            'SINGAPORE GP',
            'JAPANESE GP',
            'UNITED STATES GP',
            'MEXICAN GP',
            'BRAZILIAN GP']
            #'ABU DHABI GP]'

In [4]:
def get_race(year, race, weekend, LIMIT_OUTLIER = 1000):
    session = ff1.get_session(year, race, weekend)
    session.load()
    SAI = session.laps.pick_driver('SAI')
    PER = session.laps.pick_driver('PER')
    HAM = session.laps.pick_driver('HAM')
    LEC = session.laps.pick_driver('LEC')
    ALO = session.laps.pick_driver('ALO')
    NOR = session.laps.pick_driver('NOR')
    VER = session.laps.pick_driver('VER')
    MSC = session.laps.pick_driver('MSC')
    VET = session.laps.pick_driver('VET')
    MAG = session.laps.pick_driver('MAG')
    STR = session.laps.pick_driver('STR')
    LAT = session.laps.pick_driver('LAT')
    RIC = session.laps.pick_driver('RIC')
    TSU = session.laps.pick_driver('TSU')
    OCO = session.laps.pick_driver('OCO')
    GAS = session.laps.pick_driver('GAS')
    BOT = session.laps.pick_driver('BOT')
    RUS = session.laps.pick_driver('RUS')
    ZHO = session.laps.pick_driver('ZHO')
    ALB = session.laps.pick_driver('ALB')
    DEV = session.laps.pick_driver('DEV')
    HUL = session.laps.pick_driver('HUL')
    
    df_local = pd.concat([SAI, PER, HAM, LEC, ALO, NOR, VER, MSC, VET,MAG,
                STR, LAT, RIC, TSU, OCO, GAS, BOT, RUS, ZHO, ALB, DEV, HUL])
    df_local.reset_index(inplace=True, drop=True)
    
    # convert LapTime in timedelta64 to float in seconds
    df_local['LapTime_seconds'] = df_local['LapTime'].dt.seconds*1000000 + df_local['LapTime'].dt.microseconds
    df_local['LapTime_seconds'] = df_local['LapTime_seconds']/1000000
    
    # buang data yg IsAccurate == False
    df_local = df_local.drop(df_local[df_local.IsAccurate == False].index)

    # fine tuning, buang outlier yg ga logis
    df_local = df_local.drop(df_local[df_local.LapTime_seconds > LIMIT_OUTLIER].index)

    # add column raceweek
    df_local['Raceweek'] = race

    # hitung ranks untuk kebutuhan sorting
    ranks = pd.DataFrame(df_local.groupby("Driver")["LapTime_seconds"].mean().fillna(0).sort_values(ascending=False)[::-1])
    ranks.reset_index(inplace=True)
    ranks = ranks.to_dict('list')

    return df_local

In [5]:
df_R = pd.DataFrame()
df_Q = pd.DataFrame()

In [6]:
# get RACE and QUALIFICATION session and export to csv
for raceweek in f1_cal:
    # get RACE data every race week and concat to df_R
    df_R_temp = get_race(2022, raceweek, 'R', 1000)
    df_R = pd.concat([df_R, df_R_temp])
    # get QUALIFICATION data every race week and concat to df_Q
    df_Q_temp = get_race(2022, raceweek, 'Q', 1000)
    df_Q = pd.concat([df_Q, df_Q_temp])

core           INFO 	Loading data for Bahrain Grand Prix - Race [v2.2.2]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
api            INFO 	Using cached data for car_data
api            INFO 	Using cached data for position_data
api            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['16', '55', '44', '63', '20', '77', '31', '22', '14', '24', '47', '18', '23', '3', '4', '6', '27', '11', '1', '10']
core           INFO 	Loading data for Bahrain Grand Prix - Qualifying [v2.2.2]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_dat

In [9]:
df_R.to_csv("2022_race.csv")
df_Q.to_csv("2022_qualification.csv")

In [10]:
df_R.info()

<class 'fastf1.core.Laps'>
Int64Index: 18388 entries, 1 to 1257
Data columns (total 29 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   Time                18388 non-null  timedelta64[ns]
 1   DriverNumber        18388 non-null  object         
 2   LapTime             18388 non-null  timedelta64[ns]
 3   LapNumber           18388 non-null  float64        
 4   Stint               18388 non-null  float64        
 5   PitOutTime          0 non-null      timedelta64[ns]
 6   PitInTime           0 non-null      timedelta64[ns]
 7   Sector1Time         18388 non-null  timedelta64[ns]
 8   Sector2Time         18388 non-null  timedelta64[ns]
 9   Sector3Time         18388 non-null  timedelta64[ns]
 10  Sector1SessionTime  18388 non-null  timedelta64[ns]
 11  Sector2SessionTime  18388 non-null  timedelta64[ns]
 12  Sector3SessionTime  18388 non-null  timedelta64[ns]
 13  SpeedI1             15426 non-null  float64