In [3]:
!pip install fastf1


Collecting fastf1
  Downloading fastf1-3.5.3-py3-none-any.whl.metadata (4.6 kB)
Collecting rapidfuzz (from fastf1)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting requests-cache>=1.0.0 (from fastf1)
  Downloading requests_cache-1.2.1-py3-none-any.whl.metadata (9.9 kB)
Collecting timple>=0.1.6 (from fastf1)
  Downloading timple-0.1.8-py3-none-any.whl.metadata (2.0 kB)
Collecting websockets<14,>=10.3 (from fastf1)
  Downloading websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting cattrs>=22.2 (from requests-cache>=1.0.0->fastf1)
  Downloading cattrs-25.1.1-py3-none-any.whl.metadata (8.4 kB)
Collecting url-normalize>=1.4 (from requests-cache>=1.0.0->fastf1)
  Downloading url_normalize-2.2.1-py3-none-any.whl.metadata (5.6 kB)
Downloading fastf1-3.5.3-py3-none-any.whl (151 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [4]:
import fastf1
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

In [5]:
import os
os.makedirs('cache', exist_ok=True)


In [6]:
fastf1.Cache.enable_cache('cache')

In [19]:
def fetch_f1_data(year, round_number):
    import fastf1
    try:
        quali = fastf1.get_session(year, round_number, 'Q')
        quali.load()

        results = quali.results[['DriverNumber', 'FullName', 'TeamName', 'Q1', 'Q2', 'Q3']]
        results = results.rename(columns={'FullName': 'Driver'})

        for col in ['Q1', 'Q2', 'Q3']:
            results[col + '_Sec'] = results[col].apply(
                lambda x: x.total_seconds() if pd.notnull(x) else None
            )

        print("\nQualifying Results Structure:")
        print(results.head())

        return results

    except Exception as e:
        print(f"Error fetching data: {e}")
        return None


In [8]:
def convert_time_to_seconds(time_str):
    if pd.isna(time_str):
        return None
    try:

        if ':' in time_str:
            minutes, seconds = time_str.split(':')
            return float(minutes) * 60 + float(seconds)

        else:
            return float(time_str)
    except (ValueError, TypeError) as e:
        print(f"Warning: Could not convert time: {time_str}, Error: {e}")
        return None

In [9]:
def clean_data(df):

  print("\n before cleaning ")
  print(df[['Driver','Q1','Q2','Q3']].head())

  df['Q1_Sec']=df['Q1'].apply(convert_time_to_seconds)
  df['Q2_Sec']=df['Q2'].apply(convert_time_to_seconds)
  df['Q3_Sec']=df['Q3'].apply(convert_time_to_seconds)

  print("\n after cleaning")
  print(df[['Driver','Q1_Sec','Q2_Sec','Q3_Sec']].head())

  return df.dropna()



In [10]:
def vizulaize_data(df):
  sns.boxplot(data=df[['Q1_Sec','Q2_Sec','Q3_Sec']])
  plt.title('Qualifying Lap times (seconds)')
  plt.ylabel('lap time (seconds)')
  plt.show()

In [11]:
def train_and_evevulate(df):
  X=df[['Q1_Sec','Q2_Sec']]
  y=df[['Q3_Sec']]

  X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

  model=LinearRegression()
  model.fit(X_train,y_train)

  predictions=model.predict(X)

  results_df=df[['Driver','TeamName','Q1_Sec','Q2_Sec','Q3_Sec']].copy()
  results_df['predicted_Q3']=predictions
  results_df['Difference']=results_df['predicted_Q3']-results_df['Q3_Sec']

  results_df=results_df.sort_values('predicted_Q3')


  print("\n predicted Q3 rankings:")
  print('=' * 70)
  print(f"{'postion':<10}{'Driver':<15}{'Team':<20}{'Predicted Time':<15}{'Actual Time':<15}")
  print('-' * 70)


  for idx,row in results_df.iterrows():
    pred_time=f"{row['predicted_Q3']:.3f}"
    actual_time=f"{row['Q3_Sec']:.3f}" if not pd.isna(row['Q3_Sec']) else "N/A"
    print(f"{results_df.index.get_loc(idx)+1:<10}{row['Driver']:<15}{row['TeamName']:<20}{pred_time:<15}{actual_time<15}")


  y_pred=model.predict(X_test)
  mae=mean_absolute_error(y_test,y_pred)
  r2=r2_score(y_test,y_pred)

  print("\n Model performance metrics:")
  print('Mean Absolute Error:{mae.2f} seconds')
  print(f'R^2 Score:{r2:.2f}')





In [17]:
def fetch_recent_data():

  all_data=[]

  current_year=2025
  for round_num in range(1, 5):  # First 4 races of 2025
        print(f"Fetching data for {current_year} round {round_num}...")
        df = fetch_f1_data(current_year, round_num)
        if isinstance(df, pd.DataFrame):
            df['Year'] = current_year
            df['Round'] = round_num
            all_data.append(df)

  print("Fetching 2024 Japanese GP data...")
  japan_2024 = fetch_f1_data(2024, 4)
  if isinstance(japan_2024, pd.DataFrame):
        japan_2024['Year'] = 2024
        japan_2024['Round'] = 4
        all_data.append(japan_2024)

  return all_data

In [21]:
def apply_performance_factors(predictions_df):
    team_factors = {
        'Red Bull Racing': 0.995,
        'Ferrari': 0.998,
        'Mercedes': 1.000,
        'McLaren': 1.002,
        'Aston Martin': 1.004,
        'RB': 1.006,
        'Haas F1 Team': 1.008,
        'Williams': 1.010,
        'Alpine': 1.012,
        'Sauber': 1.014,
    }

    driver_factors = {
        'Max Verstappen': 0.995,
        'Charles Leclerc': 0.997,
        'Lewis Hamilton': 0.998,
        'Lando Norris': 0.999,
        'Fernando Alonso': 1.000,
        # add other driver adjustments as needed
    }

    predictions = []
    for idx, row in predictions_df.iterrows():
        team_factor = team_factors.get(row['Team'], 1.005)
        driver_factor = driver_factors.get(row['Driver'], 1.002)

        base_q3 = row['Predicted Q3']
        adjusted_q3 = base_q3 * team_factor * driver_factor

        predictions.append(adjusted_q3)

    predictions_df['Predicted_Q3'] = predictions
    return predictions_df



In [23]:
def predict_japanese_gp(model,latest_data):

  driver_teams={
      'Max Verstappen':'Red Bull Racing',
      'Sergio Perez':'Red Bull Racing',
      'Charles Leclerc':'Ferrari',
      'Carlos Sainz':'Ferrari',
      'Lewis Hamilton':'Mercedes',
      'George Russell':'Mercedes',
      'Lando Norris':'McLaren',
      'Oscar Piastri':'McLaren',
      'Ferrando Alonso':'Aston Martin',
      'Lance Stroll':'Aston Martin',
      'Daniel Ricciardio':'RB',
      'Yuki Tsunoda':'RB',
      'Alexender Albon':'Williams',
      'Logan Sargeant':'Williams',
      'Valtteri Bottas':'Kick Sauber',
      'Zhou Ganyu':'Kick Sauber',
      'Kevin Magnussen':'Haas F1 Team',
      'Nico Hulkenburg':'Haas F1 team',
      'Pierre Galsy':'Alpine',
      'Esteban Ocan':'Alpine'


  }

  results_df=pd.DataFrame(list(driver_teams.items()),columns=['Driver','Team'])

  merged = pd.merge(results_df, latest_data, on='Driver', how='left')

    # Predict only if Q1_Sec and Q2_Sec are available
  test_data = merged[['Q1_Sec', 'Q2_Sec']]

    # Use median imputation to fill missing values (same as training)
  imputer = SimpleImputer(strategy='median')
  X_test = pd.DataFrame(imputer.fit_transform(test_data), columns=['Q1_Sec', 'Q2_Sec'])

    # Predict Q3
  merged['Predicted Q3'] = model.predict(X_test)

    # Sort by predicted Q3 time
  results_df = merged.sort_values('Predicted Q3')

    # Print formatted results
  print("\nJapanese GP 2025 Qualifying Predictions")
  print("="*100)
  print(f"{'Position':<10}{'Driver':<20}{'Team':<25}{'Predicted Q3':<15}")
  print("-"*100)

  for idx, row in results_df.iterrows():
        print(f"{results_df.index.get_loc(idx)+1:<10}"
              f"{row['Driver']:<20}"
              f"{row['Team']:<25}"
              f"{row['Predicted Q3']:.3f}s")


if __name__=="__main__":
   print("Initializing F1 prediction model")

   all_data=fetch_recent_data()

   if all_data:

     combined_df=pd.concat(all_data,ignore_index=True)

     valid_data=combined_df.dropna(subset=['Q1_Sec','Q2_Sec','Q3_Sec'],how='all')

     imputer=SimpleImputer(strategy='median')

     X=valid_data[['Q1_Sec','Q2_Sec']]

     y=valid_data[['Q3_Sec']]

     X_clean=pd.DataFrame(imputer.fit_transform(X), columns=X.columns)
     y_clean=pd.Series(imputer.fit_transform(y.values.reshape(-1,1)).ravel())

     model=LinearRegression()
     model.fit(X_clean,y_clean)

     predict_japanese_gp(model,valid_data)

     y_pred=model.predict(X_clean)
     mae=mean_absolute_error(y_clean,y_pred)
     r2=r2_score(y_clean,y_pred)

     print("\n Model Performance Metrics:")
     print(f'Mean Absolute Error:{mae:.2f} seconds')
     print(f'R^2 Score:{r2:.2f}')

else:
  print("Failed to fetch F1 data")




core           INFO 	Loading data for Australian Grand Prix - Qualifying [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Australian Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
DEBUG:fastf1.ergast:Failed to parse timestamp '' in Ergastresponse.
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core           INFO 	Processing tim

Initializing F1 prediction model
Fetching data for 2025 round 1...


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '81', '1', '63', '22', '23', '16', '44', '10', '55', '6', '14', '18', '7', '5', '12', '27', '30', '31', '87']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['4', '81', '1', '63', '22', '23', '16', '44', '10', '55', '6', '14', '18', '7', '5', '12', '27', '30', '31', '87']
core           INFO 	Loading data for Chinese Grand Prix - Qualifying [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Chinese Grand Prix - Qualifying [v3.5.3]
req            INF


Qualifying Results Structure:
   DriverNumber          Driver         TeamName                     Q1  \
4             4    Lando Norris          McLaren 0 days 00:01:15.912000   
81           81   Oscar Piastri          McLaren 0 days 00:01:16.062000   
1             1  Max Verstappen  Red Bull Racing 0 days 00:01:16.018000   
63           63  George Russell         Mercedes 0 days 00:01:15.971000   
22           22    Yuki Tsunoda     Racing Bulls 0 days 00:01:16.225000   

                       Q2                     Q3  Q1_Sec  Q2_Sec  Q3_Sec  
4  0 days 00:01:15.415000 0 days 00:01:15.096000  75.912  75.415  75.096  
81 0 days 00:01:15.468000 0 days 00:01:15.180000  76.062  75.468  75.180  
1  0 days 00:01:15.565000 0 days 00:01:15.481000  76.018  75.565  75.481  
63 0 days 00:01:15.798000 0 days 00:01:15.546000  75.971  75.798  75.546  
22 0 days 00:01:16.009000 0 days 00:01:15.670000  76.225  76.009  75.670  
Fetching data for 2025 round 2...


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '63', '4', '1', '44', '16', '6', '12', '22', '23', '31', '27', '14', '18', '55', '10', '87', '7', '5', '30']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['81', '63', '4', '1', '44', '16', '6', '12', '22', '23', '31', '27', '14', '18', '55', '10', '87', '7', '5', '30']
core           INFO 	Loading data for Japanese Grand Prix - Qualifying [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Japanese Grand Prix - Qualifying [v3.5.3]
req            I


Qualifying Results Structure:
   DriverNumber          Driver         TeamName                     Q1  \
81           81   Oscar Piastri          McLaren 0 days 00:01:31.591000   
63           63  George Russell         Mercedes 0 days 00:01:31.295000   
4             4    Lando Norris          McLaren 0 days 00:01:30.983000   
1             1  Max Verstappen  Red Bull Racing 0 days 00:01:31.424000   
44           44  Lewis Hamilton          Ferrari 0 days 00:01:31.690000   

                       Q2                     Q3  Q1_Sec  Q2_Sec  Q3_Sec  
81 0 days 00:01:31.200000 0 days 00:01:30.641000  91.591  91.200  90.641  
63 0 days 00:01:31.307000 0 days 00:01:30.723000  91.295  91.307  90.723  
4  0 days 00:01:30.787000 0 days 00:01:30.793000  90.983  90.787  90.793  
1  0 days 00:01:31.142000 0 days 00:01:30.817000  91.424  91.142  90.817  
44 0 days 00:01:31.501000 0 days 00:01:30.927000  91.690  91.501  90.927  
Fetching data for 2025 round 3...


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '81', '16', '63', '12', '6', '44', '23', '87', '10', '55', '14', '30', '22', '27', '5', '31', '7', '18']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['1', '4', '81', '16', '63', '12', '6', '44', '23', '87', '10', '55', '14', '30', '22', '27', '5', '31', '7', '18']
core           INFO 	Loading data for Bahrain Grand Prix - Qualifying [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Bahrain Grand Prix - Qualifying [v3.5.3]
req            INF


Qualifying Results Structure:
   DriverNumber           Driver         TeamName                     Q1  \
1             1   Max Verstappen  Red Bull Racing 0 days 00:01:27.943000   
4             4     Lando Norris          McLaren 0 days 00:01:27.845000   
81           81    Oscar Piastri          McLaren 0 days 00:01:27.687000   
16           16  Charles Leclerc          Ferrari 0 days 00:01:27.920000   
63           63   George Russell         Mercedes 0 days 00:01:27.843000   

                       Q2                     Q3  Q1_Sec  Q2_Sec  Q3_Sec  
1  0 days 00:01:27.502000 0 days 00:01:26.983000  87.943  87.502  86.983  
4  0 days 00:01:27.146000 0 days 00:01:26.995000  87.845  87.146  86.995  
81 0 days 00:01:27.507000 0 days 00:01:27.027000  87.687  87.507  87.027  
16 0 days 00:01:27.555000 0 days 00:01:27.299000  87.920  87.555  87.299  
63 0 days 00:01:27.400000 0 days 00:01:27.318000  87.843  87.400  87.318  
Fetching data for 2025 round 4...


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '63', '16', '12', '10', '4', '1', '55', '44', '22', '7', '6', '14', '31', '23', '27', '30', '5', '18', '87']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['81', '63', '16', '12', '10', '4', '1', '55', '44', '22', '7', '6', '14', '31', '23', '27', '30', '5', '18', '87']
core           INFO 	Loading data for Japanese Grand Prix - Qualifying [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Japanese Grand Prix - Qualifying [v3.5.3]
req            I


Qualifying Results Structure:
   DriverNumber           Driver  TeamName                     Q1  \
81           81    Oscar Piastri   McLaren 0 days 00:01:31.392000   
63           63   George Russell  Mercedes 0 days 00:01:31.494000   
16           16  Charles Leclerc   Ferrari 0 days 00:01:31.454000   
12           12   Kimi Antonelli  Mercedes 0 days 00:01:31.415000   
10           10     Pierre Gasly    Alpine 0 days 00:01:31.462000   

                       Q2                     Q3  Q1_Sec  Q2_Sec  Q3_Sec  
81 0 days 00:01:30.454000 0 days 00:01:29.841000  91.392  90.454  89.841  
63 0 days 00:01:30.664000 0 days 00:01:30.009000  91.494  90.664  90.009  
16 0 days 00:01:30.724000 0 days 00:01:30.175000  91.454  90.724  90.175  
12 0 days 00:01:30.716000 0 days 00:01:30.213000  91.415  90.716  90.213  
10 0 days 00:01:30.643000 0 days 00:01:30.216000  91.462  90.643  90.216  
Fetching 2024 Japanese GP data...


req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core           INFO 	Processing timing data...
INFO:fastf1.fastf1.core:Processing timing data...
req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: 


Qualifying Results Structure:
   DriverNumber           Driver         TeamName                     Q1  \
1             1   Max Verstappen  Red Bull Racing 0 days 00:01:28.866000   
11           11     Sergio Perez  Red Bull Racing 0 days 00:01:29.303000   
4             4     Lando Norris          McLaren 0 days 00:01:29.536000   
55           55     Carlos Sainz          Ferrari 0 days 00:01:29.513000   
14           14  Fernando Alonso     Aston Martin 0 days 00:01:29.254000   

                       Q2                     Q3  Q1_Sec  Q2_Sec  Q3_Sec  
1  0 days 00:01:28.740000 0 days 00:01:28.197000  88.866  88.740  88.197  
11 0 days 00:01:28.752000 0 days 00:01:28.263000  89.303  88.752  88.263  
4  0 days 00:01:28.940000 0 days 00:01:28.489000  89.536  88.940  88.489  
55 0 days 00:01:29.099000 0 days 00:01:28.682000  89.513  89.099  88.682  
14 0 days 00:01:29.082000 0 days 00:01:28.686000  89.254  89.082  88.686  

Japanese GP 2025 Qualifying Predictions
Position  Driver     