# F1 Data Collection for Singapore GP Prediction

This notebook collects comprehensive historical F1 data including:
- Singapore GP results (2008-2024)
- Driver performance statistics
- Team/Constructor performance
- Weather data
- Track characteristics


In [1]:
import pandas as pd
import numpy as np
import requests
import json
import time
from datetime import datetime, timedelta
import fastf1
import os
from pathlib import Path

# Enable FastF1 cache
fastf1.Cache.enable_cache('../f1_cache')

# Set up data directory
data_dir = Path('../data')
data_dir.mkdir(exist_ok=True)

print("Data collection setup complete!")


Data collection setup complete!


## 1. Singapore GP Historical Results (2008-2024)

In [2]:
def collect_singapore_gp_results():
    """Collect Singapore GP results from 2008-2024 using FastF1"""
    
    singapore_results = []
    
    # Singapore GP started in 2008
    for year in range(2008, 2025):
        try:
            print(f"Collecting {year} Singapore GP data...")
            
            # Get race session
            session = fastf1.get_session(year, 'Singapore', 'R')
            session.load()
            
            # Get results
            results = session.results
            
            # Index(['DriverNumber', 'BroadcastName', 'Abbreviation', 'DriverId', 'TeamName',
       #'TeamColor', 'TeamId', 'FirstName', 'LastName', 'FullName',
       #'HeadshotUrl', 'CountryCode', 'Position', 'ClassifiedPosition',
       #'GridPosition', 'Q1', 'Q2', 'Q3', 'Time', 'Status', 'Points', 'Laps'],
      #dtype='object')

            # Add year and race info
            results['Year'] = year
            results['Race'] = 'Singapore'
            results['Date'] = session.date
            
            # Select relevant columns
            cols_to_keep = ['Year', 'Race', 'Date', 'Abbreviation', 'FullName', 'TeamName', 
                          'Position', 'Points', 'GridPosition', 'Status', 'Time', 'FastestLapTime']
            
            available_cols = [col for col in cols_to_keep if col in results.columns]
            results_subset = results[available_cols].copy()
            
            singapore_results.append(results_subset)
            
            # Small delay to be respectful to the API
            time.sleep(1)
            
        except Exception as e:
            print(f"Error collecting {year} data: {e}")
            continue
    
    # Combine all results
    if singapore_results:
        all_results = pd.concat(singapore_results, ignore_index=True)
        
        # Save to CSV
        output_path = data_dir / 'singapore_gp_results_2008_2024.csv'
        all_results.to_csv(output_path, index=False)
        print(f"Saved Singapore GP results: {output_path}")
        print(f"Total records: {len(all_results)}")
        
        return all_results
    else:
        print("No data collected")
        return None

# Run the collection
singapore_results = collect_singapore_gp_results()


core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Collecting 2008 Singapore GP data...


core           INFO 	Finished loading data for 20 drivers: ['5', '7', '22', '12', '15', '3', '9', '8', '16', '23', '4', '14', '2', '21', '1', '11', '20', '10', '17', '6']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Collecting 2009 Singapore GP data...


core           INFO 	Finished loading data for 20 drivers: ['1', '10', '7', '15', '22', '23', '2', '5', '17', '4', '16', '9', '3', '21', '11', '12', '14', '20', '6', '8']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Collecting 2010 Singapore GP data...


core           INFO 	Finished loading data for 24 drivers: ['8', '5', '6', '1', '4', '9', '11', '7', '14', '10', '12', '17', '3', '16', '25', '19', '24', '22', '2', '20', '23', '21', '18', '15']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Collecting 2011 Singapore GP data...


core           INFO 	Finished loading data for 24 drivers: ['1', '4', '2', '5', '3', '15', '8', '14', '6', '17', '12', '18', '11', '16', '9', '20', '10', '25', '22', '23', '19', '21', '7', '24']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Collecting 2012 Singapore GP data...


core           INFO 	Finished loading data for 24 drivers: ['1', '3', '5', '11', '8', '9', '10', '6', '16', '15', '2', '24', '14', '12', '20', '25', '22', '19', '21', '17', '7', '18', '23', '4']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Collecting 2013 Singapore GP data...


core           INFO 	Finished loading data for 22 drivers: ['1', '3', '7', '9', '10', '4', '5', '6', '11', '15', '16', '12', '17', '18', '2', '21', '23', '22', '20', '14', '8', '19']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Collecting 2014 Singapore GP data...


core           INFO 	Finished loading data for 22 drivers: ['44', '1', '3', '14', '19', '25', '11', '7', '27', '20', '77', '13', '8', '26', '9', '17', '4', '22', '99', '21', '6', '10']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Collecting 2015 Singapore GP data...


core           INFO 	Finished loading data for 20 drivers: ['5', '3', '7', '6', '77', '26', '11', '33', '55', '12', '9', '13', '8', '53', '28', '22', '14', '44', '19', '27']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Collecting 2016 Singapore GP data...


core           INFO 	Finished loading data for 22 drivers: ['6', '3', '44', '7', '5', '33', '14', '11', '26', '20', '21', '19', '12', '55', '30', '94', '9', '31', '22', '77', '27', '8']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Collecting 2017 Singapore GP data...


core           INFO 	Finished loading data for 20 drivers: ['44', '3', '77', '55', '11', '30', '2', '18', '8', '31', '19', '94', '20', '27', '9', '26', '14', '5', '33', '7']


Collecting 2018 Singapore GP data...


core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['44', '33', '5', '77', '7', '3', '14', '55', '16', '27', '9', '2', '10', '18', '8', '11', '28', '20', '35', '31']


Collecting 2019 Singapore GP data...


core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['5', '16', '33', '44', '77', '23', '4', '10', '27', '99', '8', '55', '18', '3', '26', '88', '20', '7', '11', '63']


Collecting 2020 Singapore GP data...


core           INFO 	Loading data for Hungarian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['44', '33', '77', '18', '23', '5', '11', '3', '55', '20', '16', '26', '4', '31', '7', '8', '99', '63', '6', '10']


Collecting 2021 Singapore GP data...


core           INFO 	Loading data for Hungarian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['31', '44', '55', '14', '10', '22', '6', '63', '33', '7', '3', '47', '99', '9', '4', '77', '11', '16', '18', '5']


Collecting 2022 Singapore GP data...


core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['11', '16', '55', '4', '3', '18', '1', '5', '44', '10', '77', '20', '47', '63', '22', '31', '23', '14', '6', '24']


Collecting 2023 Singapore GP data...


core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['55', '4', '44', '16', '1', '10', '81', '11', '40', '20', '23', '24', '27', '2', '14', '63', '77', '31', '22', '18']


Collecting 2024 Singapore GP data...


core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '81', '63', '16', '44', '55', '14', '27', '11', '43', '22', '31', '18', '24', '77', '10', '3', '20', '23']


Saved Singapore GP results: ..\data\singapore_gp_results_2008_2024.csv
Total records: 358


## 2. Weather Data Collection


In [3]:
# Import weather data collector
import sys
sys.path.append('.')
from weather_data_collector import WeatherDataCollector

# Initialize weather collector
weather_collector = WeatherDataCollector(data_dir)

# Collect weather data
print("Collecting weather data for Singapore GP...")
weather_data = weather_collector.collect_weather_data()

if weather_data is not None:
    print(f"Weather data collected: {len(weather_data)} records")
    print("\nSample weather data:")
    print(weather_data.head())
    
    # Show weather conditions distribution
    if 'Weather_Condition' in weather_data.columns:
        print("\nWeather conditions distribution:")
        print(weather_data['Weather_Condition'].value_counts())
else:
    print("No weather data collected")


core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Collecting weather data for Singapore GP...
Collecting weather data for Singapore GP years: [2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]
Attempting to collect weather data from FastF1...


core           INFO 	Finished loading data for 20 drivers: ['5', '7', '22', '12', '15', '3', '9', '8', '16', '23', '4', '14', '2', '21', '1', '11', '20', '10', '17', '6']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Error getting FastF1 weather data for 2008: The data you are trying to access has not been loaded yet. See `Session.load`
✗ No FastF1 weather data for 2008


core           INFO 	Finished loading data for 20 drivers: ['1', '10', '7', '15', '22', '23', '2', '5', '17', '4', '16', '9', '3', '21', '11', '12', '14', '20', '6', '8']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Error getting FastF1 weather data for 2009: The data you are trying to access has not been loaded yet. See `Session.load`
✗ No FastF1 weather data for 2009


core           INFO 	Finished loading data for 24 drivers: ['8', '5', '6', '1', '4', '9', '11', '7', '14', '10', '12', '17', '3', '16', '25', '19', '24', '22', '2', '20', '23', '21', '18', '15']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Error getting FastF1 weather data for 2010: The data you are trying to access has not been loaded yet. See `Session.load`
✗ No FastF1 weather data for 2010


core           INFO 	Finished loading data for 24 drivers: ['1', '4', '2', '5', '3', '15', '8', '14', '6', '17', '12', '18', '11', '16', '9', '20', '10', '25', '22', '23', '19', '21', '7', '24']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Error getting FastF1 weather data for 2011: The data you are trying to access has not been loaded yet. See `Session.load`
✗ No FastF1 weather data for 2011


core           INFO 	Finished loading data for 24 drivers: ['1', '3', '5', '11', '8', '9', '10', '6', '16', '15', '2', '24', '14', '12', '20', '25', '22', '19', '21', '17', '7', '18', '23', '4']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Error getting FastF1 weather data for 2012: The data you are trying to access has not been loaded yet. See `Session.load`
✗ No FastF1 weather data for 2012


core           INFO 	Finished loading data for 22 drivers: ['1', '3', '7', '9', '10', '4', '5', '6', '11', '15', '16', '12', '17', '18', '2', '21', '23', '22', '20', '14', '8', '19']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Error getting FastF1 weather data for 2013: The data you are trying to access has not been loaded yet. See `Session.load`
✗ No FastF1 weather data for 2013


core           INFO 	Finished loading data for 22 drivers: ['44', '1', '3', '14', '19', '25', '11', '7', '27', '20', '77', '13', '8', '26', '9', '17', '4', '22', '99', '21', '6', '10']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Error getting FastF1 weather data for 2014: The data you are trying to access has not been loaded yet. See `Session.load`
✗ No FastF1 weather data for 2014


core           INFO 	Finished loading data for 20 drivers: ['5', '3', '7', '6', '77', '26', '11', '33', '55', '12', '9', '13', '8', '53', '28', '22', '14', '44', '19', '27']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Error getting FastF1 weather data for 2015: The data you are trying to access has not been loaded yet. See `Session.load`
✗ No FastF1 weather data for 2015


core           INFO 	Finished loading data for 22 drivers: ['6', '3', '44', '7', '5', '33', '14', '11', '26', '20', '21', '19', '12', '55', '30', '94', '9', '31', '22', '77', '27', '8']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...


Error getting FastF1 weather data for 2016: The data you are trying to access has not been loaded yet. See `Session.load`
✗ No FastF1 weather data for 2016


core           INFO 	Finished loading data for 20 drivers: ['44', '3', '77', '55', '11', '30', '2', '18', '8', '31', '19', '94', '20', '27', '9', '26', '14', '5', '33', '7']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


Error getting FastF1 weather data for 2017: The data you are trying to access has not been loaded yet. See `Session.load`
✗ No FastF1 weather data for 2017


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['44', '33', '5', '77', '7', '3', '14', '55', '16', '27', '9', '2', '10', '18', '8', '11', '28', '20', '35', '31']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✓ Collected FastF1 weather data for 2018


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['5', '16', '33', '44', '77', '23', '4', '10', '27', '99', '8', '55', '18', '3', '26', '88', '20', '7', '11', '63']
core           INFO 	Loading data for Hungarian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✓ Collected FastF1 weather data for 2019


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['44', '33', '77', '18', '23', '5', '11', '3', '55', '20', '16', '26', '4', '31', '7', '8', '99', '63', '6', '10']
core           INFO 	Loading data for Hungarian Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✓ Collected FastF1 weather data for 2020


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['31', '44', '55', '14', '10', '22', '6', '63', '33', '7', '3', '47', '99', '9', '4', '77', '11', '16', '18', '5']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✓ Collected FastF1 weather data for 2021


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['11', '16', '55', '4', '3', '18', '1', '5', '44', '10', '77', '20', '47', '63', '22', '31', '23', '14', '6', '24']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✓ Collected FastF1 weather data for 2022


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['55', '4', '44', '16', '1', '10', '81', '11', '40', '20', '23', '24', '27', '2', '14', '63', '77', '31', '22', '18']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✓ Collected FastF1 weather data for 2023


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '81', '63', '16', '44', '55', '14', '27', '11', '43', '22', '31', '18', '24', '77', '10', '3', '20', '23']


✓ Collected FastF1 weather data for 2024
Creating synthetic weather data to fill gaps...
Creating synthetic weather data based on Singapore climate patterns...
Saved weather data: ..\data\singapore_weather_2008_2024.csv
Total records: 17
Weather data collected: 17 records

Sample weather data:
                      Time  AirTemp  Humidity  Pressure Rainfall  TrackTemp  \
0   0 days 00:00:03.989000     29.9      64.1    1007.1    False       34.2   
133 0 days 00:00:11.591000     30.0      70.7    1009.7    False       37.3   
288 0 days 00:00:05.600000     20.4      79.5     986.6     True       26.7   
422 0 days 00:00:31.224000     29.1      55.5     978.6    False       47.0   
613 0 days 00:00:18.363000     25.2      90.0    1007.7     True       24.5   

     WindDirection  WindSpeed  Year       Race  ... Temperature_Min_C  \
0            168.0        1.8  2018  Singapore  ...               NaN   
133          176.0        2.9  2019  Singapore  ...               NaN   
288        

## 3. Historical F1 Data Collection (Ergast API)


In [None]:
# Import Ergast data collector
from ergast_data_collector import ErgastDataCollector

# Initialize Ergast collector
ergast_collector = ErgastDataCollector(data_dir)

# Collect comprehensive historical data
print("Collecting historical F1 data from Ergast API...")
print("This may take a few minutes...")

historical_data = ergast_collector.collect_all_data()

# Print summary of collected data
print("\n=== HISTORICAL DATA COLLECTION SUMMARY ===")
for data_type, df in historical_data.items():
    if df is not None:
        print(f"✓ {data_type}: {len(df)} records")
    else:
        print(f"✗ {data_type}: No data collected")


## 4. Data Analysis and Validation


In [10]:
# Comprehensive data analysis and validation
def analyze_collected_data():
    """Analyze all collected data and provide insights, with robust handling for missing columns"""
    
    print("=== COMPREHENSIVE DATA ANALYSIS ===\n")
    
    # Check what data files we have
    data_files = list(data_dir.glob('*.csv'))
    print(f"Total data files collected: {len(data_files)}")
    
    for file in data_files:
        print(f"📁 {file.name}")
    
    print("\n" + "="*50)
    
    # Analyze Singapore GP results
    singapore_file = data_dir / 'singapore_gp_results_2008_2024.csv'
    if singapore_file.exists():
        print("\n🏁 SINGAPORE GP RESULTS ANALYSIS")
        results_df = pd.read_csv(singapore_file)
        
        # Print years covered and total races
        if 'Year' in results_df.columns:
            print(f"Years covered: {results_df['Year'].min()} - {results_df['Year'].max()}")
            print(f"Total races: {results_df['Year'].nunique()}")
        else:
            print("Year column not found in Singapore GP results.")
        
        # Print total drivers
        driver_code_col = None
        for col in ['DriverCode', 'Abbreviation', 'DriverId']:
            if col in results_df.columns:
                driver_code_col = col
                break
        if driver_code_col:
            print(f"Total drivers: {results_df[driver_code_col].nunique()}")
        else:
            print("No driver code/abbreviation column found in Singapore GP results.")
        
        # Print total constructors
        constructor_col = None
        for col in ['ConstructorName', 'TeamName']:
            if col in results_df.columns:
                constructor_col = col
                break
        if constructor_col:
            print(f"Total constructors: {results_df[constructor_col].nunique()}")
        else:
            print("No constructor/team column found in Singapore GP results.")
        
        # Show winners
        position_col = None
        for col in ['Position', 'positionOrder', 'Pos']:
            if col in results_df.columns:
                position_col = col
                break
        if position_col and driver_code_col and constructor_col:
            winners = results_df[results_df[position_col] == 1].copy()
            # Try to get driver name columns
            given_col = None
            family_col = None
            for g in ['DriverGivenName', 'GivenName', 'FullName']:
                if g in results_df.columns:
                    given_col = g
                    break
            for f in ['DriverFamilyName', 'FamilyName']:
                if f in results_df.columns:
                    family_col = f
                    break
            print("\n🏆 Singapore GP Winners:")
            for _, winner in winners.iterrows():
                year = winner['Year'] if 'Year' in winner else "?"
                driver_code = winner[driver_code_col]
                constructor = winner[constructor_col]
                if given_col and family_col:
                    print(f"{year}: {winner[given_col]} {winner[family_col]} ({driver_code}) - {constructor}")
                elif given_col:
                    print(f"{year}: {winner[given_col]} ({driver_code}) - {constructor}")
                else:
                    print(f"{year}: {driver_code} - {constructor}")
        else:
            print("Could not determine winners due to missing columns.")
        
        # Most successful drivers at Singapore
        if position_col and driver_code_col:
            print("\n📊 Most Successful Drivers at Singapore GP:")
            driver_wins = results_df[results_df[position_col] == 1][driver_code_col].value_counts()
            print(driver_wins.head())
        else:
            print("Cannot compute most successful drivers (missing columns).")
        
        # Most successful constructors at Singapore
        if position_col and constructor_col:
            print("\n🏭 Most Successful Constructors at Singapore GP:")
            constructor_wins = results_df[results_df[position_col] == 1][constructor_col].value_counts()
            print(constructor_wins.head())
        else:
            print("Cannot compute most successful constructors (missing columns).")
    
    # Analyze driver standings
    driver_standings_file = data_dir / 'driver_standings_2015_2024.csv'
    if driver_standings_file.exists():
        print("\n\n👤 DRIVER STANDINGS ANALYSIS")
        standings_df = pd.read_csv(driver_standings_file)
        
        if 'Year' in standings_df.columns:
            print(f"Years covered: {standings_df['Year'].min()} - {standings_df['Year'].max()}")
        print(f"Total driver-season records: {len(standings_df)}")
        
        # Top drivers by total points
        driver_code_col = None
        for col in ['DriverCode', 'Abbreviation', 'DriverId']:
            if col in standings_df.columns:
                driver_code_col = col
                break
        if driver_code_col and 'Points' in standings_df.columns:
            print("\n🏆 Top Drivers by Total Points (2015-2024):")
            top_drivers = standings_df.groupby(driver_code_col)['Points'].sum().sort_values(ascending=False)
            print(top_drivers.head(10))
        else:
            print("Cannot compute top drivers by points (missing columns).")
        
        # Most consistent drivers (average position)
        if driver_code_col and 'Position' in standings_df.columns:
            print("\n📈 Most Consistent Drivers (Average Championship Position):")
            avg_positions = standings_df.groupby(driver_code_col)['Position'].mean().sort_values()
            print(avg_positions.head(10))
        else:
            print("Cannot compute most consistent drivers (missing columns).")
    
    # Analyze constructor standings
    constructor_file = data_dir / 'constructor_standings_2015_2024.csv'
    if constructor_file.exists():
        print("\n\n🏭 CONSTRUCTOR STANDINGS ANALYSIS")
        constructor_df = pd.read_csv(constructor_file)
        
        if 'Year' in constructor_df.columns:
            print(f"Years covered: {constructor_df['Year'].min()} - {constructor_df['Year'].max()}")
        print(f"Total constructor-season records: {len(constructor_df)}")
        
        # Top constructors by total points
        constructor_col = None
        for col in ['ConstructorName', 'TeamName']:
            if col in constructor_df.columns:
                constructor_col = col
                break
        if constructor_col and 'Points' in constructor_df.columns:
            print("\n🏆 Top Constructors by Total Points (2015-2024):")
            top_constructors = constructor_df.groupby(constructor_col)['Points'].sum().sort_values(ascending=False)
            print(top_constructors.head(10))
        else:
            print("Cannot compute top constructors by points (missing columns).")
    
    # Analyze weather data
    weather_file = data_dir / 'singapore_weather_2008_2024.csv'
    if weather_file.exists():
        print("\n\n🌤️ WEATHER DATA ANALYSIS")
        weather_df = pd.read_csv(weather_file)
        
        if 'Year' in weather_df.columns:
            print(f"Years covered: {weather_df['Year'].min()} - {weather_df['Year'].max()}")
        print(f"Total weather records: {len(weather_df)}")
        
        if 'Weather_Condition' in weather_df.columns:
            print("\n🌦️ Weather Conditions Distribution:")
            print(weather_df['Weather_Condition'].value_counts())
        
        if 'Temperature_Avg_C' in weather_df.columns:
            print(f"\n🌡️ Temperature Statistics:")
            print(f"Average: {weather_df['Temperature_Avg_C'].mean():.1f}°C")
            print(f"Min: {weather_df['Temperature_Avg_C'].min():.1f}°C")
            print(f"Max: {weather_df['Temperature_Avg_C'].max():.1f}°C")
    
    print("\n" + "="*50)
    print("✅ Data collection and analysis complete!")
    
    return data_files

# Run the analysis
data_files = analyze_collected_data()


=== COMPREHENSIVE DATA ANALYSIS ===

Total data files collected: 2
📁 singapore_gp_results_2008_2024.csv
📁 singapore_weather_2008_2024.csv


🏁 SINGAPORE GP RESULTS ANALYSIS
Years covered: 2008 - 2024
Total races: 17
Total drivers: 73
Total constructors: 30

🏆 Singapore GP Winners:
2008: Fernando Alonso (ALO) - Renault
2009: Lewis Hamilton (HAM) - McLaren
2010: Fernando Alonso (ALO) - Ferrari
2011: Sebastian Vettel (VET) - Red Bull
2012: Sebastian Vettel (VET) - Red Bull
2013: Sebastian Vettel (VET) - Red Bull
2014: Lewis Hamilton (HAM) - Mercedes
2015: Sebastian Vettel (VET) - Ferrari
2016: Nico Rosberg (ROS) - Mercedes
2017: Lewis Hamilton (HAM) - Mercedes
2018: Lewis Hamilton (HAM) - Mercedes
2019: Sebastian Vettel (VET) - Ferrari
2020: Lewis Hamilton (HAM) - Mercedes
2021: Esteban Ocon (OCO) - Alpine
2022: Sergio Perez (PER) - Red Bull Racing
2023: Carlos Sainz (SAI) - Ferrari
2024: Lando Norris (NOR) - McLaren

📊 Most Successful Drivers at Singapore GP:
Abbreviation
HAM    5
VET    