In [1]:
#!pip install fastf1
import numpy as np
import pandas as pd
import fastf1
from fastf1 import get_session
import xgboost as xgb
import lightgbm as lgb
from sklearn.metrics import mean_squared_error, accuracy_score, ndcg_score
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import os
import threading
from typing import Dict, List, Tuple
from concurrent.futures import ThreadPoolExecutor
import concurrent.futures

In [2]:
def load_session(season: int, round_num: int) -> tuple:
    """Carica una singola sessione con gestione errori"""
    try:
        session = fastf1.get_session(season, round_num, 'R')
        print(f"Caricamento sessione round {round_num}...")
        session.load(telemetry=False, messages=False, weather=True,laps=False)  # Assicurati di caricare i dati meteo
        print(f"Sessione round {round_num} caricata con successo")
        return round_num, session
    except Exception as e:
        print(f"Errore nel caricamento round {round_num}: {str(e)}")
        return round_num, None
def collect_race_data(season: int, start_round: int = 1, end_round: int = 24) -> pd.DataFrame:
    all_data = []
    team_points = {}
    team_stats = {}
    h2h_quali = {}
    h2h_race = {}
    
    # Primo passaggio: carica tutte le sessioni
    sessions = {}
    missing_rounds = []
    
    # Usa un semaforo per limitare le connessioni concorrenti
    max_workers = 4
    semaphore = threading.Semaphore(max_workers)
    
    def load_session_with_retry(season, round_num, max_retries=3):
        with semaphore:
            for attempt in range(max_retries):
                try:
                    result = load_session(season, round_num)
                    return result
                except Exception as e:
                    if attempt < max_retries - 1:
                        print(f"Tentativo {attempt+1} fallito per round {round_num}: {str(e)}. Riprovo...")
                        time.sleep(2)  # Aggiunge un ritardo tra i tentativi
                    else:
                        print(f"Tutti i tentativi falliti per round {round_num}: {str(e)}")
                        return round_num, None
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_round = {
            executor.submit(load_session_with_retry, season, round_num): round_num
            for round_num in range(start_round, end_round + 1)
        }

        for future in concurrent.futures.as_completed(future_to_round):
            round_num = future_to_round[future]
            try:
                result = future.result()
                if result is not None:
                    loaded_round, session = result
                    if session is not None:
                        sessions[loaded_round] = session
                        
                        # Carica immediatamente i dati meteo per ogni sessione
                        try:
                            session.load_weather_data()
                            weather_data = session.weather_data
                            print(f"\nDebug weather data round {loaded_round}:")
                            print(f"Dati meteo disponibili: {not weather_data.empty}")
                            if not weather_data.empty:
                                print(f"Colonne meteo: {weather_data.columns.tolist()}")
                                print(f"Numero righe meteo: {len(weather_data)}")
                                print(f"Air Temp range: {weather_data['AirTemp'].min()}-{weather_data['AirTemp'].max()}")
                                print(f"Track Temp range: {weather_data['TrackTemp'].min()}-{weather_data['TrackTemp'].max()}")
                                print(f"Rainfall sum: {weather_data['Rainfall'].sum() if 'Rainfall' in weather_data.columns else 'No rainfall data'}")
                        except Exception as e:
                            print(f"Errore weather data round {loaded_round}: {str(e)}")
                            # Assegna un DataFrame vuoto ma con le colonne necessarie
                            session.weather_data = pd.DataFrame(columns=['Time', 'AirTemp', 'TrackTemp', 'Rainfall'])
                    else:
                        missing_rounds.append(round_num)
            except Exception as e:
                print(f"Errore generale nel round {round_num}: {str(e)}")
                missing_rounds.append(round_num)

    print(f"Caricate {len(sessions)} sessioni con successo")
    if missing_rounds:
        print(f"Gare mancanti: {sorted(missing_rounds)}")
        # Secondo tentativo per le gare mancanti (sequenziale)
        for round_num in sorted(missing_rounds):
            try:
                print(f"Secondo tentativo di caricamento per round {round_num}...")
                round_num, session = load_session(season, round_num)
                if session is not None:
                    sessions[round_num] = session
                    try:
                        session.load_weather_data()
                    except Exception as e:
                        print(f"Errore weather data (retry) round {round_num}: {str(e)}")
                        session.weather_data = pd.DataFrame(columns=['Time', 'AirTemp', 'TrackTemp', 'Rainfall'])
                    print(f"Recuperata sessione {round_num} al secondo tentativo")
            except Exception as e:
                print(f"Fallito anche il secondo tentativo per round {round_num}: {str(e)}")
    
    # Secondo passaggio: elabora tutte le sessioni
    for round_num, session in sorted(sessions.items()):
        try:
            if session.results is None or session.results.empty:
                print(f"Nessun risultato disponibile per round {round_num}, salto.")
                continue
                
            print(f"\nProcessing round {round_num}")

            # Ottieni i dati meteo già caricati nel passaggio precedente
            weather_data = session.weather_data if hasattr(session, 'weather_data') and not session.weather_data.empty else pd.DataFrame()
            
            # Calcola le medie delle temperature e pioggia
            air_temp = None
            track_temp = None
            rain = None
            
            if not weather_data.empty:
                if 'AirTemp' in weather_data.columns:
                    air_temp = weather_data['AirTemp'].mean()
                if 'TrackTemp' in weather_data.columns:
                    track_temp = weather_data['TrackTemp'].mean()
                if 'Rainfall' in weather_data.columns:
                    rain = weather_data['Rainfall'].sum()
            
            print(f"Round {round_num} - Dati meteo calcolati: AirTemp={air_temp}, TrackTemp={track_temp}, Rain={rain}")

            # Verifica che ci siano risultati validi
            current_results = session.results
            if current_results.empty:
                print(f"Risultati vuoti per round {round_num}, salto.")
                continue
                
            # Aggiorna statistiche team
            for team in current_results['TeamName'].unique():
                if team not in team_stats:
                    team_stats[team] = {'top10': 0, 'podiums': 0}

                team_results = current_results[current_results['TeamName'] == team]
                team_stats[team]['top10'] += sum(1 for pos in team_results['Position'] if pd.notna(pos) and pos <= 10)
                team_stats[team]['podiums'] += sum(1 for pos in team_results['Position'] if pd.notna(pos) and pos <= 3)

            # Calcola H2H per questo round con gestione dei piloti sostitutivi
            for team in current_results['TeamName'].unique():
                team_drivers = current_results[current_results['TeamName'] == team]
                if len(team_drivers) == 2:
                    driver1, driver2 = team_drivers.iloc[0], team_drivers.iloc[1]
                    team_key = f"{driver1['TeamName']}_{season}"

                    # Inizializza i dizionari H2H se necessario
                    if team_key not in h2h_quali:
                        h2h_quali[team_key] = {}
                    if team_key not in h2h_race:
                        h2h_race[team_key] = {}
                    
                    # Assicurati che entrambi i piloti esistano nei dizionari
                    if driver1['Abbreviation'] not in h2h_quali[team_key]:
                        h2h_quali[team_key][driver1['Abbreviation']] = 0
                    if driver2['Abbreviation'] not in h2h_quali[team_key]:
                        h2h_quali[team_key][driver2['Abbreviation']] = 0
                    if driver1['Abbreviation'] not in h2h_race[team_key]:
                        h2h_race[team_key][driver1['Abbreviation']] = 0
                    if driver2['Abbreviation'] not in h2h_race[team_key]:
                        h2h_race[team_key][driver2['Abbreviation']] = 0

                    # H2H Qualifiche
                    if pd.notna(driver1['GridPosition']) and pd.notna(driver2['GridPosition']):
                        if driver1['GridPosition'] < driver2['GridPosition']:
                            h2h_quali[team_key][driver1['Abbreviation']] += 1
                        elif driver2['GridPosition'] < driver1['GridPosition']:
                            h2h_quali[team_key][driver2['Abbreviation']] += 1

                    # H2H Gare
                    if (pd.notna(driver1['Position']) and pd.notna(driver2['Position'])):
                        if driver1['Position'] < driver2['Position']:
                            h2h_race[team_key][driver1['Abbreviation']] += 1
                        elif driver2['Position'] < driver1['Position']:
                            h2h_race[team_key][driver2['Abbreviation']] += 1

            # Aggiorna punti team
            for _, row in session.results.iterrows():
                team = row.get('TeamName', 'Unknown')
                points = float(row.get('Points', 0)) if pd.notna(row.get('Points', 0)) else 0
                team_points[team] = team_points.get(team, 0) + points

            # Raccogli dati piloti con gestione errori migliorata
            for idx, row in session.results.iterrows():
                try:
                    # Verifica che il pilota abbia un abbreviazione valida
                    driver_abbr = row.get('Abbreviation', '')
                    if not driver_abbr:
                        print(f"Pilota senza abbreviazione nel round {round_num}, riga {idx}, salto.")
                        continue
                        
                    # Gestione speciale per piloti sostitutivi (COL, BEA, ecc.)
                    # Per questi piloti, utilizzeremo solo i dati disponibili senza richiedere storia
                    is_substitute_driver = driver_abbr in ['COL', 'BEA'] or driver_abbr.startswith('REP_')
                    
                    # Calcola statistiche da TUTTE le sessioni precedenti
                    all_prev_results = []
                    if not is_substitute_driver:
                        for prev_round in range(1, round_num):
                            if prev_round in sessions and sessions[prev_round].results is not None:
                                prev_session = sessions[prev_round]
                                try:
                                    driver_result = prev_session.results[
                                        prev_session.results['Abbreviation'] == driver_abbr
                                    ]
                                    if not driver_result.empty:
                                        all_prev_results.append({
                                            'points': float(driver_result.iloc[0].get('Points', 0)) if pd.notna(driver_result.iloc[0].get('Points', 0)) else 0.0,
                                            'position': float(driver_result.iloc[0].get('Position', 20)) if pd.notna(driver_result.iloc[0].get('Position', 20)) else 20.0
                                        })
                                except Exception as e:
                                    print(f"Errore nel calcolo storico per {driver_abbr} nel round {prev_round}: {str(e)}")

                    # Calcola statistiche dalle ultime 4 sessioni precedenti
                    last_4_results = []
                    if not is_substitute_driver:
                        for prev_round in range(max(1, round_num - 4), round_num):
                            if prev_round in sessions and sessions[prev_round].results is not None:
                                prev_session = sessions[prev_round]
                                try:
                                    driver_result = prev_session.results[
                                        prev_session.results['Abbreviation'] == driver_abbr
                                    ]
                                    if not driver_result.empty:
                                        last_4_results.append({
                                            'points': float(driver_result.iloc[0].get('Points', 0)) if pd.notna(driver_result.iloc[0].get('Points', 0)) else 0.0,
                                            'position': float(driver_result.iloc[0].get('Position', 20)) if pd.notna(driver_result.iloc[0].get('Position', 20)) else 20.0
                                        })
                                except Exception as e:
                                    print(f"Errore nel calcolo last4 per {driver_abbr} nel round {prev_round}: {str(e)}")

                    team = row.get('TeamName', 'Unknown')
                    team_key = f"{team}_{season}"

                    # Gestisci il caso in cui il team_key non esiste negli h2h_* dict
                    h2h_quali_score = 0
                    h2h_race_score = 0
                    
                    if not is_substitute_driver and team_key in h2h_quali:
                        try:
                            other_drivers = [k for k in h2h_quali[team_key].keys() if k != driver_abbr]
                            if other_drivers:
                                main_other_driver = max(other_drivers, key=lambda x: h2h_quali[team_key].get(x, 0) + h2h_race[team_key].get(x, 0))
                                h2h_quali_score = h2h_quali[team_key].get(driver_abbr, 0) - h2h_quali[team_key].get(main_other_driver, 0)
                        except Exception as e:
                            print(f"Errore calcolo h2h_quali: {str(e)}")
                            h2h_quali_score = 0
                            
                    if not is_substitute_driver and team_key in h2h_race:
                        try:
                            other_drivers = [k for k in h2h_race[team_key].keys() if k != driver_abbr]
                            if other_drivers:
                                main_other_driver = max(other_drivers, key=lambda x: h2h_quali[team_key].get(x, 0) + h2h_race[team_key].get(x, 0))
                                h2h_race_score = h2h_race[team_key].get(driver_abbr, 0) - h2h_race[team_key].get(main_other_driver, 0)
                        except Exception as e:
                            print(f"Errore calcolo h2h_race: {str(e)}")
                            h2h_race_score = 0

                    # Prepara i dati con valori di default per piloti sostitutivi
                    driver_data = {
                        'season': season,
                        'race': round_num,
                        'driver': driver_abbr,
                        'team': team,
                        'qualifying_position': int(row.get('GridPosition', 20)) if pd.notna(row.get('GridPosition', 20)) else 20,
                        'final_position': int(row.get('Position', 20)) if pd.notna(row.get('Position', 20)) else 20,
                        'points': float(row.get('Points', 0)) if pd.notna(row.get('Points', 0)) else 0.0,
                        'dnf': 1 if row.get('Status') != 'Finished' and row.get('Status') != '+1 Lap' else 0,
                        'lapped': 1 if row.get('Status') == '+1 Lap' or row.get('Status') == '+2 Lap' else 0,
                        'team_top10_count': team_stats.get(team, {'top10': 0})['top10'],
                        'team_podiums_count': team_stats.get(team, {'podiums': 0})['podiums'],
                        'h2h_quali_score': h2h_quali_score,
                        'h2h_race_score': h2h_race_score,
                        'last4racepoints': sum(r['points'] for r in last_4_results[-4:]) if not is_substitute_driver else 0,
                        'last4median_position': int(np.median([r['position'] for r in last_4_results])) if last_4_results and not is_substitute_driver else 20,
                        'last4podiums': sum(1 for r in last_4_results if r['position'] <= 3) if not is_substitute_driver else 0,
                        'historical_median_position': int(np.median([r['position'] for r in all_prev_results])) if all_prev_results and not is_substitute_driver else 20,
                        'historical_podiums': sum(1 for r in all_prev_results if r['position'] <= 3) if not is_substitute_driver else 0,
                        'average_team_points_pw': team_points.get(team, 0) / round_num if round_num > 0 else 0,
                        'consecutive_points_finishes': sum(1 for r in last_4_results if r['points'] > 0) if not is_substitute_driver else 0,
                        'air_temp': air_temp,
                        'track_temp': track_temp,
                        'total_rain': rain,
                        'is_substitute': 1 if is_substitute_driver else 0  # Flag per indicare piloti sostitutivi
                    }
                    all_data.append(driver_data)

                except Exception as e:
                    print(f"Errore nell'elaborazione pilota {row.get('Abbreviation', 'Unknown')} nel round {round_num}: {str(e)}")
                    traceback.print_exc()
                    continue

        except Exception as e:
            print(f"Errore nell'elaborazione round {round_num}: {str(e)}")
            traceback.print_exc()
            continue

    # Verifica finale dei dati
    rounds_in_data = set(d['race'] for d in all_data)
    print(f"\nGare presenti nel dataset finale: {sorted(rounds_in_data)}")
    print(f"Gare mancanti nel dataset: {sorted(set(range(start_round, end_round + 1)) - rounds_in_data)}")
    
    result_df = pd.DataFrame(all_data) if all_data else pd.DataFrame()
    print(f"Righe totali nel DataFrame: {len(result_df)}")
    print(f"Piloti nel dataset: {sorted(result_df['driver'].unique())}")
    
    # Conteggio piloti sostitutivi
    if 'is_substitute' in result_df.columns:
        substitute_count = result_df['is_substitute'].sum()
        print(f"Piloti sostitutivi identificati: {substitute_count}")
    
    return result_df

In [85]:
data=collect_race_data(2024)

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Caricamento sessione round 1...
Caricamento sessione round 2...


core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.4.4]
core           INFO 	Loading data for Australian Grand Prix - Race [v3.4.4]
core           INFO 	Loading data for Japanese Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for driver_info


Caricamento sessione round 3...
Caricamento sessione round 4...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '63', '4', '44', '81', '14', '18', '24', '20', '3', '22', '23', '27', '31', '10', '77', '2']
core           INFO 	Loading data for Chinese Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 1 caricata con successo
Errore weather data round 1: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 1: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 5...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '16', '81', '14', '63', '38', '4', '44', '27', '23', '20', '31', '2', '22', '3', '77', '24', '18', '10']
core           INFO 	Loading data for Miami Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 2 caricata con successo
Errore weather data round 2: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 2: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 6...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '4', '14', '63', '81', '44', '22', '27', '18', '20', '77', '31', '10', '2', '24', '3', '23']
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 19 drivers: ['55', '16', '4', '81', '11', '18', '22', '14', '27', '20', '23', '3', '10', '77', '24', '31', '63', '44', '1']
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '11', '16', '55', '63', '14', '81', '44', '27', '31', '23', '10', '24', '18', '20', '2', '3', '22', '77']


Sessione round 4 caricata con successo
Errore weather data round 4: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 4: property 'weather_data' of 'Session' object has no setter
Sessione round 3 caricata con successo
Errore weather data round 3: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 3: property 'weather_data' of 'Session' object has no setter


core           INFO 	Loading data for Emilia Romagna Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
core           INFO 	Loading data for Monaco Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
core           INFO 	Loading data for Canadian Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 5 caricata con successoCaricamento sessione round 7...

Errore weather data round 5: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 5: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 8...
Caricamento sessione round 9...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '16', '11', '55', '44', '22', '63', '14', '31', '27', '10', '81', '24', '3', '77', '18', '23', '20', '2']
core           INFO 	Loading data for Spanish Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 6 caricata con successo
Errore weather data round 6: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 6: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 10...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '16', '81', '55', '44', '63', '11', '18', '22', '27', '20', '3', '31', '24', '10', '2', '77', '14', '23']
core           INFO 	Loading data for Austrian Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 7 caricata con successo
Errore weather data round 7: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 7: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 11...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '55', '4', '63', '1', '44', '22', '23', '10', '14', '3', '77', '18', '2', '24', '31', '11', '27', '20']
core           INFO 	Loading data for British Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data


Sessione round 8 caricata con successo
Errore weather data round 8: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 8: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 12...


core           INFO 	Finished loading data for 20 drivers: ['1', '4', '63', '44', '81', '14', '18', '3', '10', '31', '27', '20', '77', '22', '24', '55', '23', '11', '16', '2']
core           INFO 	Loading data for Hungarian Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 9 caricata con successo
Errore weather data round 9: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 9: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 13...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '44', '63', '16', '55', '81', '11', '10', '31', '27', '14', '24', '18', '3', '77', '20', '23', '22', '2']
core           INFO 	Loading data for Belgian Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 10 caricata con successo
Errore weather data round 10: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 10: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 14...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['63', '81', '55', '44', '1', '27', '11', '20', '3', '10', '16', '31', '18', '22', '23', '77', '24', '14', '2', '4']
core           INFO 	Loading data for Dutch Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 11 caricata con successo
Errore weather data round 11: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 11: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 15...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['44', '1', '4', '81', '55', '27', '18', '14', '23', '22', '2', '20', '3', '16', '77', '31', '11', '24', '63', '10']
core           INFO 	Loading data for Italian Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 12 caricata con successo
Errore weather data round 12: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 12: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 16...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '4', '55', '44', '1', '63', '11', '23', '20', '14', '43', '3', '31', '10', '77', '27', '24', '18', '22']
core           INFO 	Loading data for Azerbaijan Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 16 caricata con successo
Errore weather data round 16: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 16: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 17...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['81', '4', '44', '16', '1', '55', '11', '63', '22', '18', '14', '3', '27', '23', '20', '77', '2', '31', '24', '10']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 13 caricata con successo
Errore weather data round 13: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 13: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 18...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['44', '81', '16', '1', '4', '55', '11', '14', '31', '3', '18', '23', '10', '20', '77', '22', '2', '27', '24', '63']
core           INFO 	Loading data for United States Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 14 caricata con successo
Errore weather data round 14: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 14: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 19...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['16', '55', '1', '4', '81', '63', '11', '27', '30', '43', '20', '10', '14', '22', '18', '23', '77', '31', '24', '44']
core           INFO 	Loading data for Mexico City Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 19 caricata con successo
Errore weather data round 19: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 19: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 20...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '16', '81', '55', '11', '63', '44', '10', '14', '27', '3', '18', '23', '31', '2', '22', '20', '77', '24']
core           INFO 	Loading data for São Paulo Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 15 caricata con successo
Errore weather data round 15: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 15: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 21...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['81', '16', '63', '4', '1', '14', '23', '43', '44', '50', '27', '10', '3', '24', '31', '77', '11', '55', '18', '22']
core           INFO 	Loading data for Las Vegas Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 17 caricata con successo
Errore weather data round 17: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 17: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 22...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '81', '63', '16', '44', '55', '14', '27', '11', '43', '22', '31', '18', '24', '77', '10', '3', '20', '23']
core           INFO 	Loading data for Qatar Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 18 caricata con successo
Errore weather data round 18: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 18: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 23...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['55', '4', '16', '44', '63', '1', '20', '81', '27', '10', '18', '43', '31', '77', '24', '30', '11', '14', '23', '22']
core           INFO 	Loading data for Abu Dhabi Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 20 caricata con successo
Errore weather data round 20: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 20: property 'weather_data' of 'Session' object has no setter
Caricamento sessione round 24...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['4', '55', '16', '44', '63', '1', '10', '27', '14', '81', '23', '22', '24', '18', '61', '20', '30', '77', '43', '11']
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '31', '10', '63', '16', '4', '22', '81', '30', '44', '11', '50', '77', '14', '24', '55', '43', '23', '18', '27']


Sessione round 24 caricata con successo
Errore weather data round 24: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 24: property 'weather_data' of 'Session' object has no setter
Sessione round 21 caricata con successo
Errore weather data round 21: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 21: property 'weather_data' of 'Session' object has no setter


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['63', '44', '55', '16', '1', '4', '81', '27', '22', '11', '14', '20', '24', '43', '18', '30', '31', '77', '23', '10']
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '81', '63', '10', '55', '14', '24', '20', '4', '77', '44', '22', '30', '23', '27', '11', '18', '43', '31']
core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Sessione round 22 caricata con successo
Errore weather data round 22: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 22: property 'weather_data' of 'Session' object has no setter
Sessione round 23 caricata con successo
Errore weather data round 23: 'Session' object has no attribute 'load_weather_data'
Errore generale nel round 23: property 'weather_data' of 'Session' object has no setter
Caricate 24 sessioni con successo
Gare mancanti: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
Secondo tentativo di caricamento per round 1...
Caricamento sessione round 1...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '63', '4', '44', '81', '14', '18', '24', '20', '3', '22', '23', '27', '31', '10', '77', '2']
core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '16', '81', '14', '63', '38', '4', '44', '27', '23', '20', '31', '2', '22', '3', '77', '24', '18', '10']


Sessione round 1 caricata con successo
Errore weather data (retry) round 1: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 1: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 2...
Caricamento sessione round 2...
Sessione round 2 caricata con successo
Errore weather data (retry) round 2: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 2: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 3...


core           INFO 	Loading data for Australian Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 19 drivers: ['55', '16', '4', '81', '11', '18', '22', '14', '27', '20', '23', '3', '10', '77', '24', '31', '63', '44', '1']
core           INFO 	Loading data for Japanese Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Caricamento sessione round 3...
Sessione round 3 caricata con successo
Errore weather data (retry) round 3: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 3: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 4...
Caricamento sessione round 4...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '4', '14', '63', '81', '44', '22', '27', '18', '20', '77', '31', '10', '2', '24', '3', '23']
core           INFO 	Loading data for Chinese Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '11', '16', '55', '63', '14', '81', '44', '27', '31', '23', '10', '24', '18', '20', '2', '3', '22', '77']


Sessione round 4 caricata con successo
Errore weather data (retry) round 4: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 4: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 5...
Caricamento sessione round 5...
Sessione round 5 caricata con successo
Errore weather data (retry) round 5: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 5: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 6...


core           INFO 	Loading data for Miami Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '16', '11', '55', '44', '22', '63', '14', '31', '27', '10', '81', '24', '3', '77', '18', '23', '20', '2']
core           INFO 	Loading data for Emilia Romagna Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Caricamento sessione round 6...
Sessione round 6 caricata con successo
Errore weather data (retry) round 6: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 6: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 7...
Caricamento sessione round 7...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '16', '81', '55', '44', '63', '11', '18', '22', '27', '20', '3', '31', '24', '10', '2', '77', '14', '23']
core           INFO 	Loading data for Monaco Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '55', '4', '63', '1', '44', '22', '23', '10', '14', '3', '77', '18', '2', '24', '31', '11', '27', '20']


Sessione round 7 caricata con successo
Errore weather data (retry) round 7: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 7: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 8...
Caricamento sessione round 8...
Sessione round 8 caricata con successo
Errore weather data (retry) round 8: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 8: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 9...


core           INFO 	Loading data for Canadian Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '63', '44', '81', '14', '18', '3', '10', '31', '27', '20', '77', '22', '24', '55', '23', '11', '16', '2']
core           INFO 	Loading data for Spanish Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info


Caricamento sessione round 9...
Sessione round 9 caricata con successo
Errore weather data (retry) round 9: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 9: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 10...
Caricamento sessione round 10...


req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '44', '63', '16', '55', '81', '11', '10', '31', '27', '14', '24', '18', '3', '77', '20', '23', '22', '2']
core           INFO 	Loading data for Austrian Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['63', '81', '55', '44', '1', '27', '11', '20', '3', '10', '16', '31', '18', '22', '23', '77', '24', '14', '2', '4']


Sessione round 10 caricata con successo
Errore weather data (retry) round 10: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 10: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 11...
Caricamento sessione round 11...
Sessione round 11 caricata con successo
Errore weather data (retry) round 11: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 11: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 12...


core           INFO 	Loading data for British Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['44', '1', '4', '81', '55', '27', '18', '14', '23', '22', '2', '20', '3', '16', '77', '31', '11', '24', '63', '10']
core           INFO 	Loading data for Hungarian Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Caricamento sessione round 12...
Sessione round 12 caricata con successo
Errore weather data (retry) round 12: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 12: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 13...
Caricamento sessione round 13...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['81', '4', '44', '16', '1', '55', '11', '63', '22', '18', '14', '3', '27', '23', '20', '77', '2', '31', '24', '10']
core           INFO 	Loading data for Belgian Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['44', '81', '16', '1', '4', '55', '11', '14', '31', '3', '18', '23', '10', '20', '77', '22', '2', '27', '24', '63']


Sessione round 13 caricata con successo
Errore weather data (retry) round 13: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 13: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 14...
Caricamento sessione round 14...
Sessione round 14 caricata con successo
Errore weather data (retry) round 14: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 14: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 15...


core           INFO 	Loading data for Dutch Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '16', '81', '55', '11', '63', '44', '10', '14', '27', '3', '18', '23', '31', '2', '22', '20', '77', '24']
core           INFO 	Loading data for Italian Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Caricamento sessione round 15...
Sessione round 15 caricata con successo
Errore weather data (retry) round 15: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 15: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 16...
Caricamento sessione round 16...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '4', '55', '44', '1', '63', '11', '23', '20', '14', '43', '3', '31', '10', '77', '27', '24', '18', '22']
core           INFO 	Loading data for Azerbaijan Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['81', '16', '63', '4', '1', '14', '23', '43', '44', '50', '27', '10', '3', '24', '31', '77', '11', '55', '18', '22']


Sessione round 16 caricata con successo
Errore weather data (retry) round 16: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 16: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 17...
Caricamento sessione round 17...
Sessione round 17 caricata con successo
Errore weather data (retry) round 17: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 17: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 18...


core           INFO 	Loading data for Singapore Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '81', '63', '16', '44', '55', '14', '27', '11', '43', '22', '31', '18', '24', '77', '10', '3', '20', '23']
core           INFO 	Loading data for United States Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info


Caricamento sessione round 18...
Sessione round 18 caricata con successo
Errore weather data (retry) round 18: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 18: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 19...
Caricamento sessione round 19...


req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['16', '55', '1', '4', '81', '63', '11', '27', '30', '43', '20', '10', '14', '22', '18', '23', '77', '31', '24', '44']
core           INFO 	Loading data for Mexico City Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['55', '4', '16', '44', '63', '1', '20', '81', '27', '10', '18', '43', '31', '77', '24', '30', '11', '14', '23', '22']


Sessione round 19 caricata con successo
Errore weather data (retry) round 19: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 19: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 20...
Caricamento sessione round 20...
Sessione round 20 caricata con successo
Errore weather data (retry) round 20: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 20: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 21...


core           INFO 	Loading data for São Paulo Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '31', '10', '63', '16', '4', '22', '81', '30', '44', '11', '50', '77', '14', '24', '55', '43', '23', '18', '27']
core           INFO 	Loading data for Las Vegas Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Caricamento sessione round 21...
Sessione round 21 caricata con successo
Errore weather data (retry) round 21: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 21: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 22...
Caricamento sessione round 22...


req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['63', '44', '55', '16', '1', '4', '81', '27', '22', '11', '14', '20', '24', '43', '18', '30', '31', '77', '23', '10']
core           INFO 	Loading data for Qatar Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '81', '63', '10', '55', '14', '24', '20', '4', '77', '44', '22', '30', '23', '27', '11', '18', '43', '31']


Sessione round 22 caricata con successo
Errore weather data (retry) round 22: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 22: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 23...
Caricamento sessione round 23...
Sessione round 23 caricata con successo
Errore weather data (retry) round 23: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 23: property 'weather_data' of 'Session' object has no setter
Secondo tentativo di caricamento per round 24...


core           INFO 	Loading data for Abu Dhabi Grand Prix - Race [v3.4.4]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for weather_data
core           INFO 	Finished loading data for 20 drivers: ['4', '55', '16', '44', '63', '1', '10', '27', '14', '81', '23', '22', '24', '18', '61', '20', '30', '77', '43', '11']


Caricamento sessione round 24...
Sessione round 24 caricata con successo
Errore weather data (retry) round 24: 'Session' object has no attribute 'load_weather_data'
Fallito anche il secondo tentativo per round 24: property 'weather_data' of 'Session' object has no setter

Processing round 1
Round 1 - Dati meteo calcolati: AirTemp=18.22738853503185, TrackTemp=23.652866242038222, Rain=0

Processing round 2
Round 2 - Dati meteo calcolati: AirTemp=25.528082191780825, TrackTemp=31.59315068493151, Rain=0

Processing round 3
Round 3 - Dati meteo calcolati: AirTemp=20.62222222222222, TrackTemp=38.40208333333333, Rain=0

Processing round 4
Round 4 - Dati meteo calcolati: AirTemp=21.691160220994476, TrackTemp=37.50110497237569, Rain=0

Processing round 5
Round 5 - Dati meteo calcolati: AirTemp=18.63939393939394, TrackTemp=29.792727272727273, Rain=0

Processing round 6
Round 6 - Dati meteo calcolati: AirTemp=28.521999999999995, TrackTemp=44.664, Rain=0

Processing round 7
Round 7 - Dati meteo cal

In [87]:
data.loc[data['qualifying_position'] <1, 'qualifying_position'] = 20 # va fatto per sistemare problema dei piloti sostituti

In [89]:
data.set_index('driver', inplace=True)

In [91]:
# Dizionario delle caratteristiche dei circuiti
track_info = {
    "Bahrain": {"turns": 15, "length_km": 5.412, "drs_zones": 2,
                "street_circuit": False, "altitude_m": 7, "longest_straight_m": 1090},
    "Jeddah": {"turns": 27, "length_km": 6.174, "drs_zones": 3,
               "street_circuit": True, "altitude_m": 0, "longest_straight_m": 1200},
    "Melbourne": {"turns": 14, "length_km": 5.278, "drs_zones": 4,
                 "street_circuit": True, "altitude_m": 10, "longest_straight_m": 860},
    "Suzuka": {"turns": 18, "length_km": 5.807, "drs_zones": 1,
              "street_circuit": False, "altitude_m": 50, "longest_straight_m": 1200},
    "Shanghai": {"turns": 16, "length_km": 5.451, "drs_zones": 2,
                "street_circuit": False, "altitude_m": 5, "longest_straight_m": 1170},
    "Miami": {"turns": 19, "length_km": 5.412, "drs_zones": 3,
             "street_circuit": False, "altitude_m": 2, "longest_straight_m": 1280},
    "Monaco": {"turns": 19, "length_km": 3.337, "drs_zones": 1,
              "street_circuit": True, "altitude_m": 10, "longest_straight_m": 510},
    "Barcelona": {"turns": 16, "length_km": 4.675, "drs_zones": 2,
                 "street_circuit": False, "altitude_m": 115, "longest_straight_m": 1047},
    "Montreal": {"turns": 14, "length_km": 4.361, "drs_zones": 2,
                "street_circuit": True, "altitude_m": 5, "longest_straight_m": 1168},
    "Spielberg": {"turns": 10, "length_km": 4.318, "drs_zones": 3,
                 "street_circuit": False, "altitude_m": 677, "longest_straight_m": 790},
    "Silverstone": {"turns": 18, "length_km": 5.891, "drs_zones": 2,
                   "street_circuit": False, "altitude_m": 153, "longest_straight_m": 770},
    "Hungaroring": {"turns": 14, "length_km": 4.381, "drs_zones": 1,
                   "street_circuit": False, "altitude_m": 238, "longest_straight_m": 908},
    "Spa": {"turns": 19, "length_km": 7.004, "drs_zones": 2,
           "street_circuit": False, "altitude_m": 401, "longest_straight_m": 1200},
    "Zandvoort": {"turns": 14, "length_km": 4.259, "drs_zones": 2,
                 "street_circuit": False, "altitude_m": 2, "longest_straight_m": 678},
    "Monza": {"turns": 11, "length_km": 5.793, "drs_zones": 2,
             "street_circuit": False, "altitude_m": 183, "longest_straight_m": 1120},
    "Singapore": {"turns": 23, "length_km": 5.063, "drs_zones": 3,
                 "street_circuit": True, "altitude_m": 5, "longest_straight_m": 832},
    "COTA": {"turns": 20, "length_km": 5.513, "drs_zones": 2,
            "street_circuit": False, "altitude_m": 150, "longest_straight_m": 1200},
    "Mexico": {"turns": 17, "length_km": 4.304, "drs_zones": 2,
              "street_circuit": False, "altitude_m": 2285, "longest_straight_m": 1314},
    "Interlagos": {"turns": 15, "length_km": 4.309, "drs_zones": 2,
                  "street_circuit": False, "altitude_m": 800, "longest_straight_m": 1200},
    "Abu Dhabi": {"turns": 21, "length_km": 5.554, "drs_zones": 2,
                  "street_circuit": False, "altitude_m": 2, "longest_straight_m": 1140},
    "Las Vegas": {"turns": 17, "length_km": 6.120, "drs_zones": 3,
                 "street_circuit": True, "altitude_m": 620, "longest_straight_m": 1920},
    "Qatar": {"turns": 16, "length_km": 5.380, "drs_zones": 2,
             "street_circuit": False, "altitude_m": 6, "longest_straight_m": 1068},
    "Baku": {"turns": 20, "length_km": 6.003, "drs_zones": 2,
            "street_circuit": True, "altitude_m": 28, "longest_straight_m": 2200},
    "Imola": {"turns": 19, "length_km": 4909, "drs_zones": 1,
            "street_circuit": False, "altitude_m": 47, "longest_straight_m": 1050}
}

# Mappatura tra numero di gara e nome del circuito
race_to_track = {
    1: "Bahrain",
    2: "Jeddah",
    3: "Melbourne",
    4: "Suzuka",
    5: "Shanghai",
    6: "Miami",
    7: "Imola",
    8: "Monaco",
    9: "Montreal",
    10: "Barcelona",
    11: "Spielberg",
    12: "Silverstone",
    13: "Hungaroring",
    14: "Spa",
    15: "Zandvoort",
    16: "Monza",            
    17: "Baku",
    18: "Singapore",
    19: "COTA",
    20: "Mexico",
    21: "Interlagos",
    22: "Las Vegas",
    23: "Qatar",
    24: "Abu Dhabi"
}

def add_track_features(df, race_col='race'):
    result_df = df.copy()
    result_df['track_name'] = result_df[race_col].map(race_to_track)
    result_df['track_turns'] = result_df['track_name'].map(lambda x: track_info[x]['turns'])
    result_df['track_length_km'] = result_df['track_name'].map(lambda x: track_info[x]['length_km'])
    result_df['track_drs_zones'] = result_df['track_name'].map(lambda x: track_info[x]['drs_zones'])
    result_df['is_street_circuit'] = result_df['track_name'].map(lambda x: track_info[x]['street_circuit'])
    result_df['track_altitude_m'] = result_df['track_name'].map(lambda x: track_info[x]['altitude_m'])
    result_df['longest_straight_m'] = result_df['track_name'].map(lambda x: track_info[x]['longest_straight_m'])    
    return result_df

In [93]:
ds = add_track_features(data)

In [99]:
#voglio riportare la variabile track_name all'inizio
cols = list(ds.columns)
cols.remove('track_name')  # Rimuove la colonna dalla lista
cols.insert(2, 'track_name')  # La inserisce all'inizio
ds1 = ds[cols]

In [107]:
ds1.head()

Unnamed: 0_level_0,season,race,track_name,team,qualifying_position,final_position,points,dnf,lapped,team_top10_count,...,air_temp,track_temp,total_rain,is_substitute,track_turns,track_length_km,track_drs_zones,is_street_circuit,track_altitude_m,longest_straight_m
driver,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VER,2024,1,Bahrain,Red Bull Racing,1,1,26.0,0,0,2,...,18.227389,23.652866,0,0,15,5.412,2,False,7,1090
PER,2024,1,Bahrain,Red Bull Racing,5,2,18.0,0,0,2,...,18.227389,23.652866,0,0,15,5.412,2,False,7,1090
SAI,2024,1,Bahrain,Ferrari,4,3,15.0,0,0,2,...,18.227389,23.652866,0,0,15,5.412,2,False,7,1090
LEC,2024,1,Bahrain,Ferrari,2,4,12.0,0,0,2,...,18.227389,23.652866,0,0,15,5.412,2,False,7,1090
RUS,2024,1,Bahrain,Mercedes,3,5,10.0,0,0,2,...,18.227389,23.652866,0,0,15,5.412,2,False,7,1090


In [128]:
df = ds1.drop(columns=['season', 'track_name','points','team','is_substitute'])
df = pd.get_dummies(df, columns=['is_street_circuit','dnf','lapped'], drop_first=True)

In [130]:
df.head()

Unnamed: 0_level_0,race,qualifying_position,final_position,team_top10_count,team_podiums_count,h2h_quali_score,h2h_race_score,last4racepoints,last4median_position,last4podiums,...,track_temp,total_rain,track_turns,track_length_km,track_drs_zones,track_altitude_m,longest_straight_m,is_street_circuit_True,dnf_1,lapped_1
driver,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VER,1,1,1,2,2,1,1,0.0,20,0,...,23.652866,0,15,5.412,2,7,1090,False,False,False
PER,1,5,2,2,2,-1,-1,0.0,20,0,...,23.652866,0,15,5.412,2,7,1090,False,False,False
SAI,1,4,3,2,1,-1,1,0.0,20,0,...,23.652866,0,15,5.412,2,7,1090,False,False,False
LEC,1,2,4,2,1,1,-1,0.0,20,0,...,23.652866,0,15,5.412,2,7,1090,False,False,False
RUS,1,3,5,2,0,1,1,0.0,20,0,...,23.652866,0,15,5.412,2,7,1090,False,False,False


In [132]:
df.columns

Index(['race', 'qualifying_position', 'final_position', 'team_top10_count',
       'team_podiums_count', 'h2h_quali_score', 'h2h_race_score',
       'last4racepoints', 'last4median_position', 'last4podiums',
       'historical_median_position', 'historical_podiums',
       'average_team_points_pw', 'consecutive_points_finishes', 'air_temp',
       'track_temp', 'total_rain', 'track_turns', 'track_length_km',
       'track_drs_zones', 'track_altitude_m', 'longest_straight_m',
       'is_street_circuit_True', 'dnf_1', 'lapped_1'],
      dtype='object')

# ORA IL DATASET E' PRONTO

In [116]:
def prepare_ranking_data(df, race_num):
    # Separa train e test
    train_data = df[df['race'] < race_num]
    test_data = df[df['race'] == race_num]

    # Crea gruppi per il ranking (ogni gara è un gruppo)
    groups = train_data.groupby('race').size().values #numero di piloti per gara

    return train_data, test_data, groups

def train_ranking_model(train_data, groups):
    X_train = train_data.drop(columns=['final_position', 'race'])
    y_train = train_data['final_position'].map(lambda x: 21 - int(x))
    model = xgb.XGBRanker(
        objective='rank:ndcg',
        random_state=42,
        n_estimators=2500,
        max_depth=10,        # Aumentato per catturare relazioni più complesse
        learning_rate=0.001, # Ridotto per un training più graduale
        gamma=0.5,
        min_child_weight=5  # Aiuta a prevenire overfitting sui singoli esempi
    )
    model.fit(X_train, y_train, group=groups)
    return model

def evaluate_predictions(y_true, y_pred, k=20):
    """Valuta le predizioni usando metriche di ranking"""
    # Calcola NDCG@k
    ndcg = ndcg_score([y_true], [y_pred], k=k)

    # Calcola accuracy della top-3 prediction
    true_top3 = set(np.argsort(y_true)[-3:])
    pred_top3 = set(np.argsort(y_pred)[-3:])
    top3_accuracy = len(true_top3.intersection(pred_top3)) / 3

    return {
        f'ndcg@{k}': ndcg,
        'top3_accuracy': top3_accuracy
    }

In [174]:
# Uso del modello
race_to_predict = 19  # o qualsiasi altra gara
train_data, test_data, groups = prepare_ranking_data(df, race_to_predict)

In [176]:
model = train_ranking_model(train_data, groups)

In [177]:
# Predizione
X_test = test_data.drop(columns=['final_position', 'race'])
y_test = test_data['final_position'].map(lambda x: 21 - x)
y_pred = model.predict(X_test)

# Valutazione
metrics = evaluate_predictions(y_test.values, y_pred,k=20)
print(f"Metriche di valutazione: {metrics}")

Metriche di valutazione: {'ndcg@20': 0.9595958157670873, 'top3_accuracy': 0.3333333333333333}


In [178]:
# Risultati finali
result = test_data[['race']].copy()
result['driver'] = test_data.index
result['qualifying_position'] = test_data['qualifying_position']
result['final_position_real'] = test_data['final_position']
result['final_position_pred'] = np.round(y_pred, 4)
#devo riconvertirli in numeri posizione finale
result = result.sort_values('final_position_pred',ascending=False).reset_index(drop=True)
result

Unnamed: 0,race,driver,qualifying_position,final_position_real,final_position_pred
0,19,NOR,1,4,0.7345
1,19,VER,2,3,0.571
2,19,PIA,5,5,0.2596
3,19,LEC,4,1,0.2365
4,19,SAI,3,2,-0.239
5,19,RUS,20,6,-0.7864
6,19,PER,9,7,-0.9918
7,19,HAM,17,20,-1.0111
8,19,GAS,6,12,-1.2486
9,19,ALO,7,13,-1.2486


In [142]:
# Analisi feature importance
importance = pd.DataFrame({
    'feature': X_test.columns,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=False)
print("\nFeature Importance:")
print(importance)


Feature Importance:
                        feature  importance
0           qualifying_position    0.366158
2            team_podiums_count    0.166379
17              track_drs_zones    0.047605
6          last4median_position    0.047088
7                  last4podiums    0.033208
3               h2h_quali_score    0.030919
4                h2h_race_score    0.030696
10       average_team_points_pw    0.030606
13                   track_temp    0.028270
1              team_top10_count    0.028211
8    historical_median_position    0.026593
18             track_altitude_m    0.025558
12                     air_temp    0.025367
16              track_length_km    0.024118
15                  track_turns    0.023195
9            historical_podiums    0.023108
19           longest_straight_m    0.022101
5               last4racepoints    0.020823
20       is_street_circuit_True    0.000000
21                        dnf_1    0.000000
11  consecutive_points_finishes    0.000000
14         

In [None]:
# TRY TO MERGE ALL THE 24 RACES TOGETHER IN A FILE EXCEL

In [144]:
results = []
feature_importances = []
df_results = []  # Per salvare le predizioni
total_races = 24  # Numero di gare

for race in range(1, total_races + 1):
    print(f"Processing race {race}...")
    train_data, test_data, groups = prepare_ranking_data(df, race)
    
    if test_data.empty:
        continue  # Salta se non ci sono dati per la gara
    
    model = train_ranking_model(train_data, groups)
    X_test = test_data.drop(columns=['final_position', 'race'])
    y_test = test_data['final_position'].map(lambda x: 21 - x).values
    y_pred = model.predict(X_test)
    
    metrics = evaluate_predictions(y_test, y_pred, k=20)
    results.append({'race': race, **metrics})
    
    importance = pd.DataFrame({
        'feature': X_test.columns,
        'importance': model.feature_importances_
    })
    feature_importances.append(importance)
    
    df_results.append(pd.DataFrame({
        'race': race,
        'driver': test_data.index,
        'actual_position': test_data['final_position'].values,
        'qualifyin_position':test_data['qualifying_position'].values,
        'predicted_score': y_pred
    }))

# Uniamo i risultati delle predizioni
df_results = pd.concat(df_results).sort_values(by=['race', 'predicted_score'], ascending=[True, False])

df_metrics = pd.DataFrame(results)
df_feature_importance = pd.concat(feature_importances).groupby('feature').mean().reset_index()

# Salvataggio in Excel con più fogli
#with pd.ExcelWriter("f1_ranking_evaluation.xlsx") as writer:
#    df_metrics.to_excel(writer, sheet_name="Metrics", index=False)
#    df_results.to_excel(writer, sheet_name="Predictions", index=False)
#    df_feature_importance.to_excel(writer, sheet_name="Feature_Importance", index=False)
#print("Report salvato in 'f1_ranking_evaluation.xlsx'")

Processing race 1...
Processing race 2...
Processing race 3...
Processing race 4...
Processing race 5...
Processing race 6...
Processing race 7...
Processing race 8...
Processing race 9...
Processing race 10...
Processing race 11...
Processing race 12...
Processing race 13...
Processing race 14...
Processing race 15...
Processing race 16...
Processing race 17...
Processing race 18...
Processing race 19...
Processing race 20...
Processing race 21...
Processing race 22...
Processing race 23...
Processing race 24...


In [145]:
feature_importances #lista delle feature importances per tutte le 24 gare

[                        feature  importance
 0           qualifying_position         0.0
 1              team_top10_count         0.0
 2            team_podiums_count         0.0
 3               h2h_quali_score         0.0
 4                h2h_race_score         0.0
 5               last4racepoints         0.0
 6          last4median_position         0.0
 7                  last4podiums         0.0
 8    historical_median_position         0.0
 9            historical_podiums         0.0
 10       average_team_points_pw         0.0
 11  consecutive_points_finishes         0.0
 12                     air_temp         0.0
 13                   track_temp         0.0
 14                   total_rain         0.0
 15                  track_turns         0.0
 16              track_length_km         0.0
 17              track_drs_zones         0.0
 18             track_altitude_m         0.0
 19           longest_straight_m         0.0
 20       is_street_circuit_True         0.0
 21       

In [None]:
# PROVIAMO ALTRI MODELLI
# LigthGBM ranker

In [215]:
def train_ranking_model(train_data, groups):
    X_train = train_data.drop(columns=['final_position', 'race'])
    y_train = train_data['final_position'].map(lambda x: 21 - int(x))  # Converte posizioni in ranking score
    
    model = lgb.LGBMRanker(
        objective='lambdarank',
        boosting_type='gbdt',
        random_state=42,
        n_estimators=2500,
        max_depth=10,        # Stessa profondità per catturare relazioni complesse
        learning_rate=0.001, # Training più graduale
        num_leaves=64,       # Aumentato per catturare più interazioni
        min_child_weight=5,  # Previene overfitting
        importance_type='gain'
    )
    
    model.fit(X_train, y_train, group=groups)
    return model

def evaluate_predictions(y_true, y_pred, k=20):
    """Valuta le predizioni usando metriche di ranking"""
    ndcg = ndcg_score([y_true], [y_pred], k=k)  # Calcola NDCG@k
    true_top3 = set(np.argsort(y_true)[-3:])
    pred_top3 = set(np.argsort(y_pred)[-3:])
    top3_accuracy = len(true_top3.intersection(pred_top3)) / 3  # Accuracy delle top 3 previsioni
    return {
        f'ndcg@{k}': ndcg,
        'top3_accuracy': top3_accuracy
    }

In [257]:
# Uso del modello
race_to_predict = 17  # o qualsiasi altra gara
train_data, test_data, groups = prepare_ranking_data(df, race_to_predict)

In [259]:
model = train_ranking_model(train_data, groups)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000296 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 320
[LightGBM] [Info] Number of data points in the train set: 319, number of used features: 17


In [261]:
# Predizione
X_test = test_data.drop(columns=['final_position', 'race'])
y_test = test_data['final_position'].map(lambda x: 21 - x)
y_pred = model.predict(X_test)

# Valutazione
metrics = evaluate_predictions(y_test.values, y_pred,k=20)
print(f"Metriche di valutazione: {metrics}")

Metriche di valutazione: {'ndcg@20': 0.9381429774902449, 'top3_accuracy': 0.6666666666666666}


In [265]:
# Risultati finali
result = test_data[['race']].copy()
result['driver'] = test_data.index
result['qualifying_position'] = test_data['qualifying_position']
result['final_position_real'] = test_data['final_position']
result['final_position_pred'] = np.round(y_pred, 4)
#devo riconvertirli in numeri posizione finale
result = result.sort_values('final_position_pred',ascending=False).reset_index(drop=True)
result

Unnamed: 0,race,driver,qualifying_position,final_position_real,final_position_pred
0,17,LEC,1,2,0.8936
1,17,PIA,2,1,0.4622
2,17,SAI,3,18,-0.1468
3,17,VER,6,5,-0.163
4,17,PER,4,17,-0.4404
5,17,RUS,5,3,-0.513
6,17,HAM,20,9,-0.9286
7,17,NOR,15,4,-1.0089
8,17,ALO,7,6,-2.0392
9,17,ALB,9,7,-2.1201


In [269]:
# Analisi feature importance
importance = pd.DataFrame({
    'feature': X_test.columns,
    'importance': np.round(model.feature_importances_/model.feature_importances_.sum(),6)*100
}).sort_values('importance', ascending=False)
print("\nFeature Importance:")
print(importance)


Feature Importance:
                        feature  importance
0           qualifying_position     61.6714
6          last4median_position     17.6777
2            team_podiums_count      5.9814
10       average_team_points_pw      5.5965
5               last4racepoints      4.2321
7                  last4podiums      1.7981
4                h2h_race_score      1.1619
3               h2h_quali_score      0.5663
1              team_top10_count      0.5040
11  consecutive_points_finishes      0.4184
13                   track_temp      0.3191
8    historical_median_position      0.0494
12                     air_temp      0.0122
9            historical_podiums      0.0114
14                   total_rain      0.0000
15                        dnf_1      0.0000
16                     lapped_1      0.0000
17              is_substitute_1      0.0000
