In [1]:
import pickle
import fastf1 as ff1
import pandas as pd
import numpy as np

from fastf1.core import Session
from typing import List, Iterable

import f1_pitstop_advisor.idea_one.data_processing_utils as utils

from sklearn.pipeline import make_pipeline

In [2]:
with open("ig_sessions.pickle", "rb") as file:
    sessions = pickle.load(file)

In [26]:
# Organize sessions by circuit
data_by_circuit = {}
for session in sessions:
    circuit_key =  session.session_info["Meeting"]["Circuit"]["Key"]
    if circuit_key not in data_by_circuit.keys():
        data_by_circuit[circuit_key] = []
    data_by_circuit[circuit_key].append(session)

In [27]:
# Get lap data combined with weather data
studied_sessions = data_by_circuit[63] 
data_list = []
for session in studied_sessions:
    session_data = utils.get_lap_data_with_weather(session)
    utils.add_z_score_for_laps(session_data, inplace=True)
    session_data = session_data.convert_dtypes()
    data_list.append(session_data)

data = pd.concat(data_list, ignore_index=True)



In [30]:
data["IsPitLap"] = ~np.isnat(data["PitInTime"])

In [31]:
data.columns

Index(['Time', 'Driver', 'DriverNumber', 'LapTime', 'LapNumber', 'Stint',
       'PitOutTime', 'PitInTime', 'Sector1Time', 'Sector2Time', 'Sector3Time',
       'Sector1SessionTime', 'Sector2SessionTime', 'Sector3SessionTime',
       'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST', 'IsPersonalBest',
       'Compound', 'TyreLife', 'FreshTyre', 'Team', 'LapStartTime',
       'LapStartDate', 'TrackStatus', 'Position', 'Deleted', 'DeletedReason',
       'FastF1Generated', 'IsAccurate', 'AirTemp', 'Humidity', 'Pressure',
       'Rainfall', 'TrackTemp', 'WindDirection', 'WindSpeed', 'LapTimeSeconds',
       'LapTimeZScore', 'IsPitLap'],
      dtype='object')

In [32]:
selected_columns = [
    "LapTimeZScore", "IsPitLap", "Compound", "TyreLife", "FreshTyre", # Lap info
    "AirTemp", "Humidity", "Pressure", "Rainfall", "TrackTemp", "WindDirection", "WindSpeed" # Weather data
]
filtered_data = data.loc[:, selected_columns]

In [33]:
final_data = pd.get_dummies(filtered_data)

In [34]:
final_data

Unnamed: 0,LapTimeZScore,IsPitLap,TyreLife,FreshTyre,AirTemp,Humidity,Pressure,Rainfall,TrackTemp,WindDirection,WindSpeed,Compound_HARD,Compound_MEDIUM,Compound_SOFT
0,-0.271308,False,4,False,23.9,26,1010.4,False,29.0,13,0.3,False,False,True
1,-0.525218,False,5,False,23.8,26,1010.4,False,29.0,357,0.5,False,False,True
2,-0.473811,False,6,False,23.8,29,1010.2,False,28.8,13,0.4,False,False,True
3,-0.451286,False,7,False,23.8,31,1010.4,False,28.7,50,0.3,False,False,True
4,-0.417769,False,8,False,23.8,33,1010.4,False,28.5,316,0.4,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4409,-0.423008,False,26,True,26.9,44,1008.4,False,30.4,8,1.5,False,True,False
4410,-0.393133,False,27,True,26.9,44,1008.4,False,30.5,33,1.9,False,True,False
4411,-0.402422,False,28,True,26.9,43,1008.4,False,30.3,44,1.2,False,True,False
4412,-0.330371,False,29,True,26.9,43,1008.4,False,30.4,44,1.4,False,True,False


In [35]:
final_data.corr()["LapTimeZScore"].sort_values()



TyreLife          -0.247351
FreshTyre         -0.050235
Compound_MEDIUM   -0.034344
Compound_HARD     -0.006455
Pressure          -0.002692
Humidity           0.000098
AirTemp            0.003668
TrackTemp          0.011484
WindDirection      0.018492
WindSpeed          0.024241
Compound_SOFT      0.034377
IsPitLap           0.140161
LapTimeZScore      1.000000
Rainfall                NaN
Name: LapTimeZScore, dtype: float64