In [1]:
import pickle
import fastf1 as ff1
import pandas as pd
import numpy as np

from fastf1.core import Session
from typing import List, Iterable

import f1_pitstop_advisor.data_processing_utils as utils

from sklearn.pipeline import make_pipeline

In [2]:
with open("ig_sessions.pickle", "rb") as file:
    sessions = pickle.load(file)

In [3]:
# Organize sessions by circuit
data_by_circuit = {}
for session in sessions:
    circuit_key =  session.session_info["Meeting"]["Circuit"]["Key"]
    if circuit_key not in data_by_circuit.keys():
        data_by_circuit[circuit_key] = []
    data_by_circuit[circuit_key].append(session)

In [4]:
# Show circuit keys with session count
for k, v in data_by_circuit.items():
    print(f"{k}".ljust(20), len(v))

63                   4
149                  4
10                   4
151                  2
15                   4
22                   4
144                  2
23                   4
2                    4
28                   1
4                    4
7                    2
55                   3
39                   3
61                   3
46                   4
9                    1
65                   3
70                   3
152                  2
6                    2
19                   1


In [5]:
# Get lap data combined with weather data for given circuit
studied_circuit = 10
studied_sessions = data_by_circuit[studied_circuit] 
data_list = []
for session in studied_sessions:
    session_data = utils.get_lap_data_with_weather(session)
    utils.add_z_score_for_laps(session_data, inplace=True)
    session_data = session_data.convert_dtypes()
    data_list.append(session_data)

data = pd.concat(data_list, ignore_index=True)



In [6]:
# Add a feature determining whether there was a pit stop performed during each lap
data["IsPitLap"] = ~np.isnat(data["PitInTime"])

In [7]:
data.columns

Index(['Time', 'Driver', 'DriverNumber', 'LapTime', 'LapNumber', 'Stint',
       'PitOutTime', 'PitInTime', 'Sector1Time', 'Sector2Time', 'Sector3Time',
       'Sector1SessionTime', 'Sector2SessionTime', 'Sector3SessionTime',
       'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST', 'IsPersonalBest',
       'Compound', 'TyreLife', 'FreshTyre', 'Team', 'LapStartTime',
       'LapStartDate', 'TrackStatus', 'Position', 'Deleted', 'DeletedReason',
       'FastF1Generated', 'IsAccurate', 'AirTemp', 'Humidity', 'Pressure',
       'Rainfall', 'TrackTemp', 'WindDirection', 'WindSpeed', 'LapTimeSeconds',
       'LapTimeZScore', 'IsPitLap'],
      dtype='object')

In [8]:
# Select only relevant columns for further processing
selected_columns = [
    "LapTimeZScore", "IsPitLap", "Compound", "TyreLife", "FreshTyre", # Lap info
    "AirTemp", "Humidity", "Pressure", "Rainfall", "TrackTemp", "WindDirection", "WindSpeed" # Weather data
]
filtered_data = data.loc[:, selected_columns]

In [9]:
# Convert categorical data to boolean values
final_data = pd.get_dummies(filtered_data)

In [10]:
final_data

Unnamed: 0,LapTimeZScore,IsPitLap,TyreLife,FreshTyre,AirTemp,Humidity,Pressure,Rainfall,TrackTemp,WindDirection,WindSpeed,Compound_HARD,Compound_INTERMEDIATE,Compound_MEDIUM,Compound_SOFT
0,-0.074402,False,1,True,26.6,44,1014.4,False,39.3,299,1.8,False,False,True,False
1,-0.418389,False,2,True,26.6,43,1014.4,False,39.1,285,2.5,False,False,True,False
2,0.913863,False,3,True,26.6,43,1014.5,False,38.4,264,1.6,False,False,True,False
3,2.667625,False,4,True,26.6,44,1014.5,False,37.9,286,2.0,False,False,True,False
4,2.621248,False,5,True,26.4,44,1014.4,False,36.8,312,2.7,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3775,-0.671992,False,9,True,15.4,77,1010.6,False,18.7,154,2.3,False,True,False,False
3776,-0.71907,False,10,True,15.8,76,1010.4,False,18.7,159,1.3,False,True,False,False
3777,-0.676399,False,11,True,15.9,74,1010.5,False,18.7,199,1.5,False,True,False,False
3778,-0.733258,False,12,True,16.1,72,1010.4,False,18.7,156,0.8,False,True,False,False


In [11]:
final_data.corr()["LapTimeZScore"].sort_values()



TyreLife                -0.268507
Rainfall                -0.094395
Compound_HARD           -0.094101
Compound_INTERMEDIATE   -0.052636
AirTemp                 -0.014237
Humidity                -0.000151
Pressure                 0.009352
TrackTemp                0.033541
WindDirection            0.049893
FreshTyre                0.066153
Compound_SOFT            0.083790
WindSpeed                0.102546
Compound_MEDIUM          0.140864
IsPitLap                 0.304414
LapTimeZScore            1.000000
Name: LapTimeZScore, dtype: float64