### 🏎️ Project Overview: F1 Qualifying Time Prediction using Historical Data and Performance Factors

This project aims to **predict Formula 1 qualifying (Q3) lap times for the 2025 season** by combining historical race data and team/driver performance multipliers. It uses data from the 2024 and 2025 seasons, fetched via the `FastF1` API. For each 2025 race round, the model predicts lap times for all current drivers based on:

- **The best Q3 time from the corresponding round in 2024** (serving as a base reference),
- **Team and driver performance factors** (reflecting relative pace differences),
- And **random variation** to simulate real-world fluctuations.

Additionally, the model is trained using 2025 qualifying data (Rounds 1–4) to estimate relationships between Q1, Q2, and Q3 lap times using a linear regression model. The final output ranks drivers per round by their predicted Q3 times, allowing for comparative performance insights across teams and drivers throughout the upcoming season.

## Import Libraries and Set Up Logging

In [None]:
import fastf1
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_absolute_error, r2_score
import logging
import warnings

# Set logging for fastf1 to suppress info messages
logging.getLogger('fastf1').setLevel(logging.WARNING)
logging.getLogger('fastf1').disabled = True

# Suppress warnings
warnings.filterwarnings("ignore")

## Fetch Qualifying Session Data

In [None]:
# Fetch qualifying session data
def fetch_f1_data(year, round_number):
    try:
        session = fastf1.get_session(year, round_number, 'Q')
        session.load()
        results = session.results[['DriverNumber', 'FullName', 'TeamName', 'Q1', 'Q2', 'Q3']]
        results = results.rename(columns={'FullName': 'Driver'})
        for col in ['Q1', 'Q2', 'Q3']:
            results[col + '_sec'] = results[col].apply(
                lambda x: x.total_seconds() if pd.notnull(x) else None
            )
        results['Round'] = round_number
        results['Year'] = year
        return results
    except Exception as e:
        print(f"Could not load data for Round {round_number}: {e}")
        return None

## Fetch Multiple Rounds Data (for 2025)

In [None]:
# Fetch multiple rounds (for 2025)
def fetch_all_2025_data():
    all_data = []
    for rnd in range(1, 5):  # Rounds 1 to 24 for 2025
        print(f"Fetching Round {rnd} for 2025...")
        data = fetch_f1_data(2025, rnd)
        if data is not None:
            all_data.append(data)
    return pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame()

## Fetch Multiple Rounds Data (for 2024)

In [None]:
# Fetch multiple rounds (for 2024)
def fetch_all_2024_data():
    all_data = []
    for rnd in range(1, 25):  # Rounds 1 to 24 for 2024
        print(f"Fetching Round {rnd} for 2024...")
        data = fetch_f1_data(2024, rnd)
        if data is not None:
            all_data.append(data)
    return pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame()

## Get the Best Q3 Time for Each Round in 2024

In [None]:
# Get the best Q3 time for each round in 2024
def get_best_q3_times_2024():
    all_data = fetch_all_2024_data()
    best_times = []
    
    for rnd in range(1, 25):  # Rounds 1 to 24
        round_data = all_data[all_data['Round'] == rnd]
        if not round_data.empty:
            best_q3_time = round_data['Q3_sec'].min()
            best_times.append({'Round': rnd, 'Best_Q3_time_sec': best_q3_time})
    
    return pd.DataFrame(best_times)

## Apply Performance Multipliers

In [None]:
# Apply performance multipliers
def apply_performance_factors(predictions_df, base_times):
    team_factors = {
        'Red Bull Racing': 0.997,
        'Ferrari': 0.998,
        'McLaren': 0.995,
        'Mercedes': 0.998,
        'Aston Martin': 1.001,
        'RB': 1.002,
        'Williams': 1.003,
        'Haas F1 Team': 1.004,
        'Kick Sauber': 1.004,
        'Alpine': 1.005,
    }

    driver_factors = {
        'Max Verstappen': 0.998,
        'Lando Norris': 0.999,
        'Oscar Piastri': 0.999,
        'Charles Leclerc': 1.000,
        'Lewis Hamilton': 1.000,
        'Yuki Tsunoda': 1.001,
        'Kimi Antonelli': 1.001,
        'George Russell': 1.000,
        'Fernando Alonso': 1.001,
        'Lance Stroll': 1.003,
        'Isack Hadjar': 1.003,
        'Liam Lawson': 1.004,
        'Alexander Albon': 1.003,
        'Carlos Sainz': 1.002,
        'Nico Hulkenberg': 1.002,
        'Gabriel Bortoleto': 1.004,
        'Oliver Bearman': 1.004,
        'Esteban Ocon': 1.003,
        'Pierre Gasly': 1.003,
        'Jack Doohan': 1.004
    }

    # Map the base times for each round
    predictions_df = predictions_df.merge(base_times[['Round', 'Best_Q3_time_sec']], on='Round', how='left')

    for idx, row in predictions_df.iterrows():
        tf = team_factors.get(row['Team'], 1.005)
        df = driver_factors.get(row['Driver'], 1.002)
        predictions_df.loc[idx, 'Predicted_Q3'] = row['Best_Q3_time_sec'] * tf * df + np.random.uniform(-0.1, 0.1)

    return predictions_df

## Converting Seconds into Minutes

In [None]:
# Convert seconds to minutes:seconds
def convert_seconds_to_minutes(seconds):
    minutes = int(seconds // 60)
    seconds = seconds % 60
    return f"{minutes:02}:{seconds:05.2f}"

## Predict for All 24 Rounds in 2025

In [None]:
# Predict for all 24 rounds
def predict_all_rounds(model, base_times):
    driver_teams = {
        'Max Verstappen': 'Red Bull Racing',
        'Yuki Tsunoda': 'Red Bull Racing',
        'Charles Leclerc': 'Ferrari',
        'Lewis Hamilton': 'Ferrari',
        'Kimi Antonelli': 'Mercedes',
        'George Russell': 'Mercedes',
        'Lando Norris': 'McLaren',
        'Oscar Piastri': 'McLaren',
        'Fernando Alonso': 'Aston Martin',
        'Lance Stroll': 'Aston Martin',
        'Isack Hadjar': 'RB',
        'Liam Lawson': 'RB',
        'Alexander Albon': 'Williams',
        'Carlos Sainz': 'Williams',
        'Nico Hulkenberg': 'Kick Sauber',
        'Gabriel Bortoleto': 'Kick Sauber',
        'Oliver Bearman': 'Haas F1 Team',
        'Esteban Ocon': 'Haas F1 Team',
        'Pierre Gasly': 'Alpine',
        'Jack Doohan': 'Alpine'
    }

    for rnd in range(1, 25):
        print(f"\nPredictions for Round {rnd} - 2025:")
        predictions_df = pd.DataFrame(driver_teams.items(), columns=['Driver', 'Team'])
        predictions_df['Round'] = rnd
        predictions_df = apply_performance_factors(predictions_df, base_times)
        predictions_df = predictions_df.sort_values('Predicted_Q3')

        print("=" * 90)
        print(f"{'Pos':<5}{'Driver':<20}{'Team':<25}{'Predicted Q3':<15}")
        print("-" * 90)
        
        for i, row in enumerate(predictions_df.itertuples(), 1):
            predicted_q3_time = convert_seconds_to_minutes(row.Predicted_Q3)
            print(f"{i:<5}{row.Driver:<20}{row.Team:<25}{predicted_q3_time}")


## Prepare Model, Train, and Predict

In [None]:
# Main block - Prepare Model, Train, and Make Predictions
base_times_2024 = get_best_q3_times_2024()

if not base_times_2024.empty:
    all_data = fetch_all_2025_data()

    if not all_data.empty:
        # Preprocess data
        valid_data = all_data.dropna(subset=['Q1_sec', 'Q2_sec', 'Q3_sec'], how='any')
        X = valid_data[['Q1_sec', 'Q2_sec']]
        y = valid_data['Q3_sec']

        # Handle missing values with SimpleImputer
        imputer = SimpleImputer(strategy='median')
        X_clean = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)
        y_clean = pd.Series(imputer.fit_transform(y.values.reshape(-1, 1)).ravel())

        # Train the model
        model = LinearRegression()
        model.fit(X_clean, y_clean)

        # Predict the target values
        y_pred = model.predict(X_clean)

        # Calculate model performance
        mae = mean_absolute_error(y_clean, y_pred)
        r2 = r2_score(y_clean, y_pred)

        # Store model and other results for later use
        results = {
            "model": model,
            "mae": mae,
            "r2": r2,
            "base_times_2024": base_times_2024
        }
    else:
        print("No qualifying data found for 2025.")
else:
    print("Could not fetch best Q3 times for 2024.")

## Print Results

In [None]:
# Main block - Print Model Performance and Predictions
if 'results' in locals():
    print("\nModel Performance:")
    print(f"MAE: {results['mae']:.2f} seconds")
    print(f"R² Score: {results['r2']:.2f}")

    # Print predictions for all rounds
    predict_all_rounds(results['model'], results['base_times_2024'])
else:
    print("Model was not trained due to missing data.")
