# Importing neccesary libraries

In [1]:
!pip install fastf1

import fastf1
import pandas as pd
import numpy as np
from fastf1 import Cache

Collecting fastf1
  Downloading fastf1-3.5.3-py3-none-any.whl.metadata (4.6 kB)
Collecting rapidfuzz (from fastf1)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting requests-cache>=1.0.0 (from fastf1)
  Downloading requests_cache-1.2.1-py3-none-any.whl.metadata (9.9 kB)
Collecting timple>=0.1.6 (from fastf1)
  Downloading timple-0.1.8-py3-none-any.whl.metadata (2.0 kB)
Collecting websockets<14,>=10.3 (from fastf1)
  Downloading websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting cattrs>=22.2 (from requests-cache>=1.0.0->fastf1)
  Downloading cattrs-24.1.3-py3-none-any.whl.metadata (8.4 kB)
Collecting url-normalize>=1.4 (from requests-cache>=1.0.0->fastf1)
  Downloading url_normalize-2.2.0-py3-none-any.whl.metadata (4.9 kB)
Downloading fastf1-3.5.3-py3-none-any.whl (151 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

# Set Up Cache and Define Drivers

In [11]:
# Enable caching
Cache.enable_cache('/content/cache')

# Define non-rookie drivers for 2025 with their FastF1 abbreviations
non_rookie_drivers = {
    'VER': 'Max Verstappen', 'HAM': 'Lewis Hamilton', 'NOR': 'Lando Norris',
    'LEC': 'Charles Leclerc', 'PIA': 'Oscar Piastri', 'RUS': 'George Russell',
    'SAI': 'Carlos Sainz', 'TSU': 'Yuki Tsunoda', 'ALO': 'Fernando Alonso',
    'GAS': 'Pierre Gasly', 'OCO': 'Esteban Ocon', 'HUL': 'Nico Hülkenberg',
    'ALB': 'Alex Albon', 'LAW': 'Liam Lawson', 'STR': 'Lance Stroll'
}

# Dictionary to store finishing positions and lap time variance
driver_stats = {driver_code: {'finishes': [], 'lap_variance': [], 'years_raced': 0}
                for driver_code in non_rookie_drivers.keys()}

# Fetch Historical Suzuka Data (2018-2024)

In [14]:
# Loop through each year’s Japanese GP
for year in range(2018, 2025):
    try:
        session = fastf1.get_session(year, 'Japan', 'R')
        session.load(telemetry=False)  # No telemetry needed, saves time
        results = session.results
        laps = session.laps

        # Track which drivers raced this year
        raced_drivers = set()

        for driver_code, driver_name in non_rookie_drivers.items():
            # Match driver by Abbreviation (more reliable than DriverNumber)
            driver_result = results[results['Abbreviation'] == driver_code]
            if not driver_result.empty:
                finish = driver_result['Position'].values[0]
                if pd.notna(finish):
                    driver_stats[driver_code]['finishes'].append(float(finish))
                    raced_drivers.add(driver_code)

            # Get lap time variance
            driver_laps = laps.pick_driver(driver_code)
            if not driver_laps.empty:
                lap_times = driver_laps['LapTime'].dropna().dt.total_seconds()
                if len(lap_times) > 1:
                    variance = np.var(lap_times)
                    driver_stats[driver_code]['lap_variance'].append(variance)

        # Update years raced for drivers who participated
        for driver_code in raced_drivers:
            driver_stats[driver_code]['years_raced'] += 1

    except Exception as e:
        print(f"Error loading {year}: {e}")

# Calculate averages for drivers with data
for driver_code in non_rookie_drivers.keys():
    finishes = driver_stats[driver_code]['finishes']
    variances = driver_stats[driver_code]['lap_variance']
    if finishes:
        driver_stats[driver_code]['avg_finish'] = round(sum(finishes) / len(finishes), 2)
    if variances:
        driver_stats[driver_code]['avg_variance'] = round(sum(variances) / len(variances), 4)

core           INFO 	Loading data for Japanese Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Japanese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core       

In [15]:
# Create a DataFrame from driver_stats
data = []
for driver_code, driver_name in non_rookie_drivers.items():
    finishes = driver_stats[driver_code]['finishes']
    variances = driver_stats[driver_code]['lap_variance']
    years_raced = driver_stats[driver_code]['years_raced']
    avg_finish = driver_stats[driver_code].get('avg_finish', 'N/A')
    avg_variance = driver_stats[driver_code].get('avg_variance', 'N/A')

    data.append({
        'Driver': driver_name,
        'Years Raced': years_raced,
        'Average Finish': avg_finish,
        'Average Lap Variance': avg_variance,
        'Finishing Positions': finishes if finishes else 'No data',
        'Lap Variances': [round(v, 4) for v in variances] if variances else 'No data'
    })

# Convert to DataFrame
df = pd.DataFrame(data)

# Style the table for better presentation
def highlight_na(val):
    color = '#f2dede' if val == 'N/A' or val == 'No data' else 'white'
    return f'background-color: {color}'

styled_df = df.style.set_properties(**{
    'text-align': 'center',
    'border': '1px solid black',
    'padding': '5px',
    'color': 'black'  # Ensure all text is black
}).set_table_styles([
    {'selector': 'th', 'props': [('background-color', '#4CAF50'), ('color', 'white'), ('font-weight', 'bold'), ('text-align', 'center'), ('border', '1px solid black')]}
]).applymap(highlight_na)

# Display the styled table
display(styled_df)

  ]).applymap(highlight_na)


Unnamed: 0,Driver,Years Raced,Average Finish,Average Lap Variance,Finishing Positions,Lap Variances
0,Max Verstappen,10,3.1,49.8337,"[1.0, 1.0, 1.0, 3.0, 18.0, 2.0, 2.0, 1.0, 1.0, 1.0]","[31.4843, 34.4928, 38.452, 174.769, 5.6385, 10.1959, 98.8756, 31.4843, 34.4928, 38.452]"
1,Lewis Hamilton,10,4.4,63.7863,"[5.0, 5.0, 9.0, 1.0, 3.0, 1.0, 1.0, 5.0, 5.0, 9.0]","[53.135, 43.164, 73.0569, 179.2493, 15.5105, 12.9473, 91.4443, 53.135, 43.164, 73.0569]"
2,Lando Norris,9,7.0,50.6142,"[10.0, 2.0, 5.0, 11.0, 10.0, 8.0, 10.0, 2.0, 5.0]","[98.6747, 40.3784, 42.2292, 18.725, 11.7092, 62.5288, 98.6747, 40.3784, 42.2292]"
3,Charles Leclerc,10,7.0,57.4768,"[3.0, 4.0, 4.0, 18.0, 6.0, 20.0, 4.0, 3.0, 4.0, 4.0]","[34.5486, 36.0998, 71.4783, 120.7751, 28.1813, 65.8369, 75.7211, 34.5486, 36.0998, 71.4783]"
4,Oscar Piastri,4,5.5,51.787,"[3.0, 8.0, 3.0, 8.0]","[45.212, 58.3621, 45.212, 58.3621]"
5,George Russell,9,10.11,52.3621,"[8.0, 7.0, 7.0, 16.0, 17.0, 14.0, 8.0, 7.0, 7.0]","[76.6286, 38.4222, 87.9876, 11.295, 11.691, 42.1963, 76.6286, 38.4222, 87.9876]"
6,Carlos Sainz,10,8.4,50.0714,"[19.0, 6.0, 3.0, 10.0, 5.0, 6.0, 7.0, 19.0, 6.0, 3.0]","[37.4111, 46.9225, 146.4741, 8.6961, 11.8133, 64.9202, 37.4111, 46.9225]"
7,Yuki Tsunoda,7,12.86,73.4513,"[13.0, 12.0, 10.0, 20.0, 13.0, 12.0, 10.0]","[90.4939, 46.7592, 108.6566, 22.3395, 90.4939, 46.7592, 108.6566]"
8,Fernando Alonso,8,9.12,64.5019,"[7.0, 8.0, 6.0, 14.0, 17.0, 7.0, 8.0, 6.0]","[71.6982, 34.8648, 49.852, 141.8182, 61.3674, 71.6982, 34.8648, 49.852]"
9,Pierre Gasly,10,12.5,98.8024,"[18.0, 10.0, 16.0, 11.0, 7.0, 9.0, 10.0, 18.0, 10.0, 16.0]","[173.3038, 58.1554, 146.101, 148.6924, 8.2568, 11.8223, 64.1318, 173.3038, 58.1554, 146.101]"


In [16]:
# Compute average finish and consistency score
driver_summary = {}

for driver in non_rookie_drivers:
    finishes = driver_stats[driver]['finishes']
    variances = driver_stats[driver]['lap_variance']

    # Average finish (lower is better)
    avg_finish = np.mean(finishes) if finishes else 20.0  # Default 20 for no data

    # Average variance (lower is better, consistency)
    avg_variance = np.mean(variances) if variances else 1.0  # Default 1.0 if no laps

    # Composite score: 70% avg finish, 30% consistency (normalized variance)
    score = (0.7 * avg_finish) + (0.3 * avg_variance)
    driver_summary[driver] = {
        'avg_finish': avg_finish,
        'avg_variance': avg_variance,
        'score': score
    }

# Convert to DataFrame for sorting
df_summary = pd.DataFrame.from_dict(driver_summary, orient='index')
df_summary['Driver'] = [driver_names[d] for d in df_summary.index]

In [19]:
# Sort the DataFrame by score (lower is better)
df_sorted = df_summary.sort_values('score')

# Style the table for better presentation
def highlight_high_scores(val):
    color = '#f2dede' if val > 15 else 'white'  # Highlight scores above 15 with light red
    return f'background-color: {color}'

styled_df = df_sorted.style.set_properties(**{
    'text-align': 'center',
    'border': '1px solid black',
    'padding': '5px',
    'color': 'black'  # Ensure all text is black for visibility
}).set_table_styles([
    {'selector': 'th',
     'props': [('background-color', '#4CAF50'), ('color', 'white'), ('font-weight', 'bold'),
               ('text-align', 'center'), ('border', '1px solid black')]}
]).map(highlight_high_scores, subset=['score'])  # Changed applymap to map

# Add caption
styled_df = styled_df.set_caption("Driver Performance Summary for 2025 Japanese GP Prediction")

# For renaming columns, you should use .set_properties instead of relabel_index
# Since you have 15 columns but only provided 4 labels, consider using rename or columns attribute

# Display the styled table
display(styled_df)

Unnamed: 0,avg_finish,avg_variance,score,Driver
VER,3.1,49.833699,17.12011,Max Verstappen
PIA,5.5,51.787036,19.386111,Oscar Piastri
NOR,7.0,50.614185,20.084255,Lando Norris
SAI,8.4,50.071368,20.90141,Carlos Sainz
LAW,11.0,45.159719,21.247916,Liam Lawson
LEC,7.0,57.476786,22.143036,Charles Leclerc
HAM,4.4,63.786312,22.215894,Lewis Hamilton
RUS,10.111111,52.362121,22.786414,George Russell
ALO,9.125,64.501936,25.738081,Fernando Alonso
TSU,12.857143,73.451261,31.035378,Yuki Tsunoda


In [20]:
# Sort by composite score (lower is better)
df_sorted = df_summary.sort_values('score')

# Add predicted position
df_sorted['Predicted Position'] = range(1, len(df_sorted) + 1)

# Display results
print("Predicted 2025 Japanese Grand Prix Standings (Non-Rookies):")
print(df_sorted[['Driver', 'Predicted Position', 'avg_finish', 'avg_variance', 'score']])

# Output top 15 explicitly
for i, row in df_sorted.iterrows():
    print(f"{int(row['Predicted Position'])}. {row['Driver']}")

Predicted 2025 Japanese Grand Prix Standings (Non-Rookies):
              Driver  Predicted Position  avg_finish  avg_variance      score
VER   Max Verstappen                   1    3.100000     49.833699  17.120110
PIA    Oscar Piastri                   2    5.500000     51.787036  19.386111
NOR     Lando Norris                   3    7.000000     50.614185  20.084255
SAI     Carlos Sainz                   4    8.400000     50.071368  20.901410
LAW      Liam Lawson                   5   11.000000     45.159719  21.247916
LEC  Charles Leclerc                   6    7.000000     57.476786  22.143036
HAM   Lewis Hamilton                   7    4.400000     63.786312  22.215894
RUS   George Russell                   8   10.111111     52.362121  22.786414
ALO  Fernando Alonso                   9    9.125000     64.501936  25.738081
TSU     Yuki Tsunoda                  10   12.857143     73.451261  31.035378
OCO     Esteban Ocon                  11    9.666667     92.477921  34.510043
HUL 