In [None]:
import fastf1
from fastf1 import plotting
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns
from tqdm import tqdm
import warnings
import os
warnings.filterwarnings('ignore')

cache_path = r'D:\f1-prediction-ml-2025-zoe\cache'
if not os.path.exists(cache_path):
    os.makedirs(cache_path)
fastf1.Cache.enable_cache(cache_path)

TRACK_TYPES = {
    'Australia': 'semi-permanent',
    'Monaco': 'street',
    'Miami': 'semi-permanent',
    'Singapore': 'street',
    'Baku': 'street',
    'Las Vegas': 'street',
    'Albert Park': 'semi-permanent',
    'Montreal': 'semi-permanent',
    'Jeddah': 'street'
} #all others are defaulted to permanent

In [None]:
#feature extraction functions
def get_track_type(event_name):
    for track, track_type in TRACK_TYPES.items():
        if track.lower() in event_name.lower():
            return track_type
    return 'permanent'

def get_weather_condition(session):
    try:
        weather_data = session.weather_data
        if weather_data is not None and not weather_data.empty:
            # If any rainfall is True, return 'wet', else 'dry'
            if weather_data['Rainfall'].any():
                return 'wet'
            else:
                return 'dry'
        else:
            return 'dry' #default
    except Exception as e:
        print(f"Error extracting weather condition: {e}")
        return 'unknown'

def get_car_pace_indicator(year, event_name, driver_code):
#get relative pace from fp3 session
#why fp3 and not quali?
    try: 
        fp3 = fastf1.get_session(year, event_name, 'FP3')
        fp3.load(laps=True)
        fastest_lap = fp3.laps.pick_fastest()
        if fastest_lap is not None and not fastest_lap.empty:
            driver_lap = fp3.laps.pick_driver(driver_code).pick_fastest()
            if driver_lap is not None and not driver_lap.empty:
                #calculate pace difference as percentage
                pace_diff = (driver_lap['LapTime'].total_seconds() - fastest_lap['LapTime'].total_seconds()) / fastest_lap['LapTime'].total_seconds() * 100
                return round(pace_diff, 3)
            else:
                return 0.0 #default if no data
    except Exception as e:
        print(f"Error getting pace for {driver_code} in {event_name}: {e}")
        return 0.0

