In [40]:
"""
FastF1 Data Fetcher for HPC F1 AI Strategy System

Downloads telemetry and race data from a specific F1 session to simulate
live telemetry streaming from a Raspberry Pi "racecar" to the HPC layer.

Usage:
    python fetch_race_data.py --year 2024 --race "Monaco" --driver VER --output data/
"""
import fastf1
import pandas as pd

# 1. Load the session
session = fastf1.get_session(2023, 'Monza', 'R')
session.load(telemetry=True, laps=True, weather=True)

# 2. Pick the driver
driver_laps = session.laps.pick_drivers('ALO')

# Get total number of laps in the race (maximum lap number from all drivers)
total_laps = session.laps['LapNumber'].max()

# 3. Collect all telemetry data with lap information
telemetry_list = []

for lap_idx in driver_laps.index:
    lap = driver_laps.loc[lap_idx]
    lap_number = lap['LapNumber']
    tire_compound = lap['Compound']
    tire_life = lap['TyreLife']  # Number of laps on current tires
    
    # Get telemetry for this lap
    car_data = lap.get_car_data()
    
    if car_data is not None and len(car_data) > 0:
        # Add lap number, tire compound, and tire life to each telemetry point
        car_data['LapNumber'] = lap_number
        car_data['Compound'] = tire_compound
        car_data['TyreLife'] = tire_life
        telemetry_list.append(car_data)

# 4. Combine all telemetry data
all_telemetry = pd.concat(telemetry_list, ignore_index=True)

# 5. Get weather data
weather = session.weather_data

# 6. Merge telemetry with weather based on timestamp
# First, ensure both have SessionTime column
all_telemetry['SessionTime'] = pd.to_timedelta(all_telemetry['SessionTime'])
weather['SessionTime'] = pd.to_timedelta(weather['Time'])

# Merge using merge_asof for time-based joining
all_telemetry = all_telemetry.sort_values('SessionTime')
weather = weather.sort_values('SessionTime')

merged_data = pd.merge_asof(
    all_telemetry,
    weather[['SessionTime', 'TrackTemp', 'Rainfall']],
    on='SessionTime',
    direction='nearest'
)

# 7. Create final dataframe with requested columns
final_df = pd.DataFrame({
    'lap_number': merged_data['LapNumber'],
    'total_laps': total_laps,  # Total laps in the race
    'speed': merged_data['Speed'],
    'overall_time': merged_data['SessionTime'],
    'throttle': merged_data['Throttle'],
    'brake': merged_data['Brake'],
    'tire_compound': merged_data['Compound'],
    'tire_life_laps': merged_data['TyreLife'],  # Number of laps on current tires
    'track_temperature': merged_data['TrackTemp'],
    'rainfall': merged_data['Rainfall']
})

print(f"Created dataframe with {len(final_df)} rows")
print(f"Total laps in race: {total_laps}")
print(f"Laps covered: {final_df['lap_number'].min()} to {final_df['lap_number'].max()}")
print(f"Tire life range: {final_df['tire_life_laps'].min()} to {final_df['tire_life_laps'].max()} laps")
final_df.head(10)


core           INFO 	Loading data for Italian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req      

Created dataframe with 16584 rows
Total laps in race: 51.0
Laps covered: 1.0 to 51.0
Tire life range: 1.0 to 33.0 laps


Unnamed: 0,lap_number,total_laps,speed,overall_time,throttle,brake,tire_compound,tire_life_laps,track_temperature,rainfall
0,1.0,51.0,0.0,0 days 01:22:21.734000,23.0,False,MEDIUM,1.0,42.5,False
1,1.0,51.0,0.0,0 days 01:22:21.894000,23.0,False,MEDIUM,1.0,42.5,False
2,1.0,51.0,4.0,0 days 01:22:22.214000,26.0,False,MEDIUM,1.0,42.5,False
3,1.0,51.0,14.0,0 days 01:22:22.494000,24.0,False,MEDIUM,1.0,42.5,False
4,1.0,51.0,24.0,0 days 01:22:22.774000,24.0,False,MEDIUM,1.0,42.5,False
5,1.0,51.0,31.0,0 days 01:22:22.974000,26.0,False,MEDIUM,1.0,42.5,False
6,1.0,51.0,38.0,0 days 01:22:23.254000,36.0,False,MEDIUM,1.0,42.5,False
7,1.0,51.0,50.0,0 days 01:22:23.494000,41.0,False,MEDIUM,1.0,42.5,False
8,1.0,51.0,58.0,0 days 01:22:23.694000,44.0,False,MEDIUM,1.0,42.5,False
9,1.0,51.0,71.0,0 days 01:22:23.974000,55.0,False,MEDIUM,1.0,42.5,False


In [41]:
# Display dataframe info and sample statistics
print("Dataframe Info:")
print(f"Total telemetry points: {len(final_df)}")
print(f"\nColumn types:")
print(final_df.dtypes)
print(f"\nBasic statistics:")
final_df.describe()


Dataframe Info:
Total telemetry points: 16584

Column types:
lap_number                   float64
total_laps                   float64
speed                        float64
overall_time         timedelta64[ns]
throttle                     float64
brake                           bool
tire_compound                 object
tire_life_laps               float64
track_temperature            float64
rainfall                        bool
dtype: object

Basic statistics:


Unnamed: 0,lap_number,total_laps,speed,overall_time,throttle,tire_life_laps,track_temperature
count,16584.0,16584.0,16584.0,16584,16584.0,16584.0,16584.0
mean,25.891341,51.0,235.570188,0 days 01:59:34.577446394,72.291546,15.339243,42.908816
std,14.710977,0.0,76.948906,0 days 00:21:30.065940875,40.561237,8.558018,0.897756
min,1.0,51.0,0.0,0 days 01:22:21.734000,0.0,1.0,40.8
25%,13.0,51.0,180.0,0 days 01:40:53.558000,40.0,8.0,42.5
50%,26.0,51.0,245.0,0 days 01:59:31.222000,100.0,15.0,43.1
75%,39.0,51.0,309.0,0 days 02:18:13.365000,100.0,21.0,43.6
max,51.0,51.0,351.0,0 days 02:36:49.228000,100.0,33.0,44.4


In [42]:
final_df.to_csv("ALONSO_2023_MONZA_RACE")

In [43]:
# Show tire compound changes and stint information
print("Tire compound usage throughout the race:")
tire_changes = final_df.groupby(['lap_number', 'tire_compound', 'tire_life_laps']).size().reset_index(name='count')
tire_changes = tire_changes.groupby(['lap_number', 'tire_compound', 'tire_life_laps']).first().reset_index()[['lap_number', 'tire_compound', 'tire_life_laps']]
print(tire_changes.drop_duplicates())


Tire compound usage throughout the race:
    lap_number tire_compound  tire_life_laps
0          1.0        MEDIUM             1.0
1          2.0        MEDIUM             2.0
2          3.0        MEDIUM             3.0
3          4.0        MEDIUM             4.0
4          5.0        MEDIUM             5.0
5          6.0        MEDIUM             6.0
6          7.0        MEDIUM             7.0
7          8.0        MEDIUM             8.0
8          9.0        MEDIUM             9.0
9         10.0        MEDIUM            10.0
10        11.0        MEDIUM            11.0
11        12.0        MEDIUM            12.0
12        13.0        MEDIUM            13.0
13        14.0        MEDIUM            14.0
14        15.0        MEDIUM            15.0
15        16.0        MEDIUM            16.0
16        17.0        MEDIUM            17.0
17        18.0        MEDIUM            18.0
18        19.0        MEDIUM            19.0
19        20.0        MEDIUM            20.0
20        21.0