In [None]:
import fastf1
from fastf1 import get_session
from fastf1.utils import delta_time
import matplotlib.pyplot as plt
# Features and target
import pandas as pd
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline

## Loaded all season data, quali and race

In [None]:
fastf1.Cache.enable_cache('/Users/nooralindeflaten/f1_ML_predictor/data/cache')
session = get_session(2023, 'Australian GP', 'Q')
session.load()
track = session.get_circuit_info()
track.corners

In [None]:
def print_circuit_properties(circuit_info):
    print("Circuit Properties:")
    for prop in dir(circuit_info):
        if not prop.startswith('_'):
            print(prop)
    print("\n")
    print("Circuit Methods:")
    for method in dir(circuit_info):
        if not method.startswith('_'):
            method_obj = getattr(circuit_info, method)
            print(f"{method} -> {type(method_obj)}")
            if callable(method_obj):
                print(f"{method}()")      

track.corners

In [None]:
fastf1.Cache.enable_cache('/Users/nooralindeflaten/f1_ML_predictor/data/cache')


practice_sessions = ['FP1','FP2','FP3']

all_laps_by_session = {}

for session_type in practice_sessions:
    try:
        session = fastf1.get_session(2022,7,session_type)
        session.load()
        laps = session.laps[session.laps['Driver'] == 'LEC']  # or all drivers if you remove this
        all_laps_by_session[session_type] = laps  # This is a FastF1 `Laps` object
    except Exception as e:
        print(f'Failed {e}')

races = fastf1.get_session(2022,7,'R')
races.load()
leclerc_race_laps = races.laps[races.laps['Driver'] == 'LEC']

quali = fastf1.get_session(2022,7,'Q')
quali.load()
leclerc_quali_laps = quali.laps[quali.laps['Driver'] == 'LEC']


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


def get_degradation_data(laps,weather_df):
    deg_data = []

    for _, lap in laps.iterlaps():
        try:
            car_data = lap.get_car_data()
            speed_avg = car_data['Speed'].mean()
            tyrelife = lap['TyreLife']
            compound = lap['Compound']
            laptime = lap['LapTime'].total_seconds()
            lap_time = lap['LapStartTime'].to_pytimedelta()

            # Find closest weather timestamp
            weather_row = weather_df.iloc[(weather_df['Time'] - lap_time).abs().argmin()]
            track_temp = weather_row['TrackTemp']
            pressure = weather_row['Pressure']
            air_temp = weather_row['AirTemp']

            deg_data.append({
                'lap_number': lap['LapNumber'],
                'compound': compound,
                'tyrelife': tyrelife,
                'laptime': laptime,
                'speed_avg': speed_avg,
                'track_temp': track_temp,
                'pressure': pressure,
                'air_temp': air_temp
            })
        except Exception as e:
            print(f"Skipping lap due to error: {e}")
    
    return pd.DataFrame(deg_data)

print(get_degradation_data(fp1,fp1.get_weather_data()))
def all_weather(all_laps_by_session):
    for sess in all_laps_by_session:
        weather_df = all_laps_by_session[sess].get_weather_data()
        laps = all_laps_by_session[sess]
        weather_fp1 = get_degradation_data(laps,weather_df)
        sns.lmplot(data=weather_fp1, x='tyrelife', y='speed_avg', hue='compound')
        plt.title("Tyre Degradation – Avg Speed vs Tyre Life")

all_weather(all_laps_by_session=all_laps_by_session)

In [None]:
import pandas as pd

def build_practice_df_d(lap_summary_df,practice):
    # Step 1: Parse LapStartTime to align with car data timestamps
    lap_summary_df['LapStartTime'] = pd.to_timedelta(lap_summary_df['LapStartTime'])

    # Step 2: Convert car_data time to timedelta
    # Step 3: Group car data into laps
    lap_aggregates = []

    for _, lap in lap_summary_df.iterlaps():
        try:
            car_data = lap.get_car_data().add_distance()  # You can drop .add_distance() if not needed

            avg_speed = car_data['Speed'].mean()
            avg_rpm = car_data['RPM'].mean()
            avg_throttle = car_data['Throttle'].mean()
            avg_brake = car_data['Brake'].mean()
            weather_data = lap.get_weather_data()

            lap_aggregates.append({
                str(practice): practice,
                'LapNumber': lap['LapNumber'],
                'speed_avg': avg_speed,
                'rpm_avg': avg_rpm,
                'throttle_avg': avg_throttle,
                'brake_avg': avg_brake,
                'TrackTemp': weather_data['TrackTemp'],
                'Pressure': weather_data['Pressure'],
                'AirTemp': weather_data['AirTemp'],
                'Humidity': weather_data['Humidity'],
                'WindSpeed': weather_data['WindSpeed'],
                'Rainfall': weather_data['Rainfall'],
                'WindDirection': weather_data['WindDirection']
            })
        except Exception as e:
            print(f"Skipping lap {lap.LapNumber} due to error: {e}")

    agg_df = pd.DataFrame(lap_aggregates)

    # Step 4: Merge aggregates with lap_summary
    combined_df = lap_summary_df.merge(agg_df, on='LapNumber', how='left')


    return combined_df


weather = {
    'track_temp': 54.7,
    'pressure': 1015.2,
    'humidity': 70.0  # Optional: add what you have
}

# Assuming your raw dataframes are named:
# leclerc_laps, leclerc_car_data
practice_df = build_practice_df_d(fp1,'FP1')

practice_df['FP1']  # View the output!


## Building practice data

In [None]:
import pandas as pd
fp1 = all_laps_by_session['FP1']
fp2 = all_laps_by_session['FP2']
fp3 = all_laps_by_session['FP3']


def build_practice_df(lap_summary_df, practice_name):
    lap_summary_df['LapStartTime'] = pd.to_timedelta(lap_summary_df['LapStartTime'])

    lap_aggregates = []

    for _, lap in lap_summary_df.iterlaps():
        try:
            car_data = lap.get_car_data().add_distance()

            lap_aggregates.append({
                'LapNumber': lap['LapNumber'],
                'speed_avg': car_data['Speed'].mean(),
                'rpm_avg': car_data['RPM'].mean(),
                'throttle_avg': car_data['Throttle'].mean(),
                'brake_avg': car_data['Brake'].mean(),
                'TrackTemp': lap.get_weather_data()['TrackTemp'],
                'Pressure': lap.get_weather_data()['Pressure'],
                'AirTemp': lap.get_weather_data()['AirTemp'],
                'Humidity': lap.get_weather_data()['Humidity'],
                'WindSpeed': lap.get_weather_data()['WindSpeed'],
                'Rainfall': lap.get_weather_data()['Rainfall'],
                'WindDirection': lap.get_weather_data()['WindDirection'],
                'Session': practice_name  # 🆕 Add session tag here
            })
        except Exception as e:
            print(f"Skipping lap {lap.LapNumber} due to error: {e}")

    agg_df = pd.DataFrame(lap_aggregates)

    combined_df = lap_summary_df.merge(agg_df, on='LapNumber', how='left')
    return combined_df

fp1 = build_practice_df(fp1, "FP1")
fp2 = build_practice_df(fp2, "FP2")
fp3 = build_practice_df(fp3, "FP3")


## Reading practice data

In [None]:
fp1 = pd.read_csv("/Users/nooralindeflaten/f1_ML_predictor/data/processed/leclerc_2022_monaco_fp1.csv")
fp2 = pd.read_csv("/Users/nooralindeflaten/f1_ML_predictor/data/processed/leclerc_2022_monaco_fp2.csv")
fp3 = pd.read_csv("/Users/nooralindeflaten/f1_ML_predictor/data/processed/leclerc_2022_monaco_fp3.csv")

# Add session label for clarity
fp1["Session"] = "FP1"
fp2["Session"] = "FP2"
fp3["Session"] = "FP3"

# Combine them
practice_df = pd.concat([fp1, fp2, fp3], ignore_index=True)

# Select relevant columns
degradation_df = practice_df[["LapNumber",
    "LapTime", "TyreLife", "Compound", "TrackTemp", "Pressure",
    "AirTemp", "Humidity", "Driver", "Session"
]]

# Optional: convert LapTime to total seconds if not already
degradation_df["LapTime"] = pd.to_timedelta(degradation_df["LapTime"]).dt.total_seconds()

# Drop any missing or nonsense values
degradation_df = degradation_df.dropna(subset=["LapTime", "TyreLife", "Compound"])
degradation_df.head()

In [None]:
# Visualize degradation by compound
plt.figure(figsize=(10, 6))
sns.lineplot(data=practice_df, x="TyreLife", y="LapTime", hue="Compound", errorbar=None)
plt.title("Lap Time vs. Tyre Life by Compound")
plt.xlabel("Tyre Life (laps)")
plt.ylabel("Lap Time (s)")
plt.grid(True)
plt.show()


## Regression model

In [None]:
X = degradation_df[["TyreLife", "Compound", "TrackTemp", "AirTemp", "Pressure"]]
y = degradation_df["LapTime"]

# Encode categorical 'Compound'
preprocessor = ColumnTransformer([
    ("compound", OneHotEncoder(drop="first"), ["Compound"])
], remainder='passthrough')

# Pipeline: Encoding + Regression
model = make_pipeline(preprocessor, LinearRegression())

# Fit the model
model.fit(X, y)

# Check coefficients (interpretation time!)
linreg = model.named_steps["linearregression"]
features = model.named_steps["columntransformer"].get_feature_names_out()
coef_df = pd.DataFrame({
    "Feature": features,
    "Coefficient": linreg.coef_
})

compounds = ['SOFT', 'MEDIUM', 'HARD']
tyrelife = list(range(1, 21))  # Simulate laps 1 to 20
track_temp = 54.2
air_temp = 30.8
pressure = 1014.8

data = []
for compound in compounds:
    for tl in tyrelife:
        features = {
            "Compound": compound,
            "TyreLife": tl,
            "TrackTemp": track_temp,
            "AirTemp": air_temp,
            "Pressure": pressure
        }
        data.append(features)

input_df = pd.DataFrame(data)
# Print out the feature names expected by the model
print(input_df)



## Prediction

In [None]:
import pandas as pd
import numpy as np

def simulate_stint(model, compound, track_temp, air_temp, pressure, laps=20):
    # One-hot encoding for compound
    compounds = ['SOFT', 'MEDIUM', 'HARD']
    compound_onehot = {f'compound__{c}': 0 for c in compounds}
    compound_onehot[f'compound__{compound}'] = 1
    
    compounds = ['SOFT', 'MEDIUM', 'HARD']
    tyrelife = list(range(1, 21))  # Simulate laps 1 to 20
    track_temp = 54.2
    air_temp = 30.8
    pressure = 1014.8

    data = []
    for compound in compounds:
        for tl in tyrelife:
            features = {
                "Compound": compound,
                "TyreLife": tl,
                "TrackTemp": track_temp,
                "AirTemp": air_temp,
                "Pressure": pressure
            }
            data.append(features)

    input_df = pd.DataFrame(data)
    return input_df
# Print out the feature names expected by the model

# Simulating stints for different compounds
soft_stint = simulate_stint(model, 'SOFT', 54.155, 30.845, 1014.821)
medium_stint = simulate_stint(model, 'MEDIUM', 54.155, 30.845, 1014.821)
hard_stint = simulate_stint(model, 'HARD', 54.155, 30.845, 1014.821)

# Combine stints into a single DataFrame
stints_df = pd.concat([soft_stint, medium_stint, hard_stint], ignore_index=True)

# Display the result (or plot if needed)
print(stints_df)



In [None]:
def simulate_stint(model, compound, track_temp, air_temp, pressure, laps=20):
    # One-hot encoding for compound
    compounds = ['SOFT', 'MEDIUM', 'HARD']
    compound_onehot = {f'compound__{c}': 0 for c in compounds}
    compound_onehot[f'compound__{compound}'] = 1
    
    tyrelife = list(range(1, laps + 1))  # Simulate laps 1 to 20

    data = []
    for tl in tyrelife:
        features = {
            "Compound": compound,
            "TyreLife": tl,
            "TrackTemp": track_temp,
            "AirTemp": air_temp,
            "Pressure": pressure,
            **compound_onehot  # Add the one-hot encoded compound data
        }
        data.append(features)

    input_df = pd.DataFrame(data)

    # Ensure the input columns are in the same order as the model expects
    input_df = input_df[model.feature_names_in_]

    # Make predictions using the model
    predictions = model.predict(input_df)

    # Return result DataFrame with predictions
    return pd.DataFrame({
        'LapNumber': range(1, laps + 1),
        'PredictedLapTime': predictions,
        'Compound': compound
    })

# Simulating stints for different compounds
soft_stint = simulate_stint(model, 'SOFT', 54.155, 30.845, 1014.821)
medium_stint = simulate_stint(model, 'MEDIUM', 54.155, 30.845, 1014.821)
hard_stint = simulate_stint(model, 'HARD', 54.155, 30.845, 1014.821)

# Combine stints into a single DataFrame
stints_df = pd.concat([soft_stint, medium_stint, hard_stint], ignore_index=True)

# Display the result
print(stints_df)


In [None]:
def build_session_df(driver_laps, session_name="Session"):
    import pandas as pd
    
    driver_laps['LapStartTime'] = pd.to_timedelta(driver_laps['LapStartTime'])
    aggregates = []

    for _, lap in driver_laps.iterlaps():
        try:
            car_data = lap.get_car_data().add_distance()
            telemetry = {
                'LapNumber': lap['LapNumber'],
                'speed_avg': car_data['Speed'].mean(),
                'rpm_avg': car_data['RPM'].mean(),
                'throttle_avg': car_data['Throttle'].mean(),
                'brake_avg': car_data['Brake'].mean(),
            }

            # Weather data for this lap (if available)
            weather_data = lap.get_weather_data()
            if weather_data is not None:
                telemetry.update({
                    'TrackTemp': weather_data['TrackTemp'],
                    'AirTemp': weather_data['AirTemp'],
                    'Pressure': weather_data['Pressure'],
                    'Humidity': weather_data['Humidity'],
                    'Rainfall': weather_data['Rainfall'],
                    'WindSpeed': weather_data['WindSpeed'],
                    'WindDirection': weather_data['WindDirection'],
                })

            telemetry['Session'] = session_name
            aggregates.append(telemetry)

        except Exception as e:
            print(f"Skipping lap {lap.LapNumber} due to error: {e}")

    agg_df = pd.DataFrame(aggregates)
    driver_laps = driver_laps.copy()
    driver_laps['LapTimeSec'] = pd.to_timedelta(driver_laps['LapTime']).dt.total_seconds()
    return driver_laps.merge(agg_df, on="LapNumber", how="left")


In [None]:
race_df = build_session_df(leclerc_race_laps, "Race")
quali_df = build_session_df(leclerc_quali_laps, "Quali")


In [None]:
def get_fast_laps(df, percentile=0.7):
    threshold = df['LapTimeSec'].quantile(percentile)
    return df[df['LapTimeSec'] <= threshold].reset_index(drop=True)

def get_slow_laps(df, percentile=0.3):
    threshold = df['LapTimeSec'].quantile(percentile)
    return df[df['LapTimeSec'] >= threshold].reset_index(drop=True)


In [None]:
fast_laps = get_fast_laps(race_df)
slow_laps = get_slow_laps(race_df)

print(f"Fast laps count: {len(fast_laps)}")
print(f"Slow laps count: {len(slow_laps)}")

# Quick comparison
cols_to_compare = ['speed_avg', 'throttle_avg', 'brake_avg', 'TrackTemp', 'Rainfall', 'TyreLife']

print("Fast Lap Averages:")
print(fast_laps[cols_to_compare].mean())

print("\nSlow Lap Averages:")
print(slow_laps[cols_to_compare].mean())


## Summarizing lap

In [None]:
import pandas as pd

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

def preprocess_lap_data_for_clustering(df,n_clusters=4):
    # Convert LapTime and SectorTimes to total seconds
    time_cols = ['LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time']
    for col in time_cols:
        df[col] = pd.to_timedelta(df[col]).dt.total_seconds()
    
    # Select relevant numerical features
    features = [
        'LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time',
        'speed_avg', 'rpm_avg', 'throttle_avg', 'brake_avg',
        'TrackTemp', 'Rainfall', 'TyreLife', 'Pressure',
        'AirTemp', 'Humidity', 'WindSpeed'
        
    ]
    
    # Drop rows with missing values in selected features
    cluster_data = df.dropna(subset=features).copy()
    indices = cluster_data.index

    # Step 5: Scale the features
    X = cluster_data[features]
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Step 6: Run KMeans clustering
    model = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
    labels = model.fit_predict(X_scaled)

    # Step 7: Attach labels back to the original DataFrame
    df.loc[indices, 'cluster'] = labels # return the index so we can attach cluster labels back later
    return df, model

def run_kmeans_clustering(X_scaled, n_clusters=4):
    # Fit KMeans model
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    cluster_labels = kmeans.fit_predict(X_scaled)
    return cluster_labels, kmeans

"Clustering functions defined and ready to use!"
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

def combine_sessions_and_cluster(fp1_df,fp2_df,fp3_df, quali_laps, race_laps, n_clusters=4):
    # Step 1: Combine all laps into one DataFrame
    combined_df = pd.concat([fp1_df,fp2_df,fp3_df, quali_laps, race_laps], ignore_index=True)
    
    # Step 2: Convert time features to total seconds (so they’re numeric)
    for col in ['LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time']:
        combined_df[col] = combined_df[col].dt.total_seconds()

    # Step 3: Define features for clustering
    features = [
        'LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time',
        'speed_avg', 'rpm_avg', 'throttle_avg', 'brake_avg',
        'TyreLife', 'TrackTemp', 'Pressure', 'AirTemp', 'Humidity',
        'WindSpeed', 'Rainfall'
    ]

    # Step 4: Drop NaNs and keep a reference to their original index
    cluster_data = combined_df.dropna(subset=features).copy()
    indices = cluster_data.index

    # Step 5: Scale the features
    X = cluster_data[features]
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Step 6: Run KMeans clustering
    model = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
    labels = model.fit_predict(X_scaled)

    # Step 7: Attach labels back to the original DataFrame
    combined_df.loc[indices, 'cluster'] = labels

    return combined_df, model


In [None]:


combined_df, cluster_model = combine_sessions_and_cluster(fp1,fp2,fp3, quali_df, race_df, n_clusters=4)


In [None]:
def summarize_clusters(clustered_df, group_col='cluster'):
    summary = clustered_df.groupby(group_col)[[
        'LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time',
        'speed_avg', 'rpm_avg', 'throttle_avg', 'brake_avg',
        'TyreLife', 'TrackTemp', 'Rainfall', 'AirTemp', 'Humidity'
    ]].mean().sort_index()

    count = clustered_df[group_col].value_counts().sort_index().rename('Lap Count')
    
    return summary.join(count)
cluster_summary = summarize_clusters(combined_df)

In [None]:
fp_cluster, indices = preprocess_lap_data_for_clustering(fp1,n_clusters=4)
print(fp_cluster)


In [None]:
def get_cluster_samples(df, n_samples=3, random_state=42):
    """Return n sample laps from each cluster in the DataFrame."""
    samples = df.groupby('cluster').apply(
        lambda x: x.sample(min(len(x), n_samples), random_state=random_state)
    )
    # Remove multi-index from groupby
    samples.reset_index(drop=True, inplace=True)
    return samples

samples_fp1 = get_cluster_samples(fp1, n_samples=3)
samples_fp1

In [None]:
def get_corner_segments(car,pos,circuit_info, margin=10):
    from scipy.spatial import cKDTree

    segments = {}
    # Create a KDTree for fast spatial lookup
    track_coords = pos[['X', 'Y']].to_numpy()
    dist_tree = cKDTree(track_coords)

    for _, corner in circuit_info.corners.iterrows():
        # Corner name
        name = f"{corner['Number']}{corner['Letter']}"

        # Find closest telemetry point to corner coordinate
        corner_coord = [corner['X'], corner['Y']]
        dist, idx = dist_tree.query(corner_coord)

        # Get the distance at that point in telemetry
        try:
            center_dist = car.iloc[idx]['Distance']
        except IndexError:
            continue

        # Slice ±margin meters around that distance
        mask = (car['Distance'] >= center_dist - margin) & (car['Distance'] <= center_dist + margin)
        segment = car[mask].copy()
        segments[name] = segment

    return segments


In [None]:
def merge_car_and_pos(car_data, pos_data):
    pos_data_interp = pos_data.set_index('SessionTime')[['X', 'Y']].interpolate(method='time')
    merged = car_data.set_index('SessionTime').join(pos_data_interp, how='left')
    return merged.reset_index()

from scipy.spatial import cKDTree
import numpy as np

def label_corners_in_pos(pos_data, corner_coords, radius=15):
    # Build KDTree for corners
    corner_points = np.array([[c['x'], c['y']] for c in corner_coords])
    tree = cKDTree(corner_points)
    
    # For each position point, find nearest corner (if close enough)
    coords = pos_data[['X', 'Y']].values
    distances, indices = tree.query(coords, distance_upper_bound=radius)
    
    # Label with corner names (NaN if too far)
    corner_labels = [
        corner_coords[i]['name'] if d < radius else np.nan
        for i, d in zip(indices, distances)
    ]
    
    pos_data['corner'] = corner_labels
    return pos_data

def interpolate_corner_labels(car_data, pos_data_with_corners):
    pos_data_labeled = pos_data_with_corners.set_index('SessionTime')[['corner']]
    
    # Resample to fill gaps and align to car_data time range
    pos_data_resampled = pos_data_labeled.resample('10ms').ffill()

    # Merge using SessionTime index
    car_data_labeled = car_data.set_index('SessionTime').join(pos_data_resampled, how='left')
    return car_data_labeled.reset_index()

def summarize_driver_behavior_by_corner(car_data_labeled):
    corner_groups = car_data_labeled.dropna(subset=['corner']).groupby('corner')
    summary = corner_groups[['Throttle', 'Brake', 'Speed', 'RPM']].mean()
    return summary


In [None]:
# Full context
from scipy.spatial import cKDTree
import numpy as np

def label_pos_data_with_corners(pos_data, corner_coords, radius=15):
    # KDTree setup
    corner_points = np.array([[c['x'], c['y']] for c in corner_coords])
    tree = cKDTree(corner_points)

    # Find nearest corner for each position
    pos_points = pos_data[['X', 'Y']].values
    distances, indices = tree.query(pos_points, distance_upper_bound=radius)

    # Assign corner names (or NaN if too far)
    pos_data['corner'] = [
        corner_coords[i]['name'] if d < radius else np.nan
        for i, d in zip(indices, distances)
    ]

    return pd.DataFrame(pos_data)

def merge_corner_labels_into_car_data(car_data, pos_data_with_corners, resample_rate=''):
    # Prepare pos_data with just SessionTime and corner info
    labeled_pos = pos_data_with_corners[['SessionTime', 'corner']].dropna().copy()
    labeled_pos.set_index('SessionTime', inplace=True)
    labeled_pos = labeled_pos[~labeled_pos.index.duplicated(keep='first')]  # avoid duplicates
    
    # Resample to match car_data granularity
    pos_resampled = labeled_pos.resample(resample_rate).ffill()

    # Merge corner labels into car_data
    car_data_labeled = car_data.set_index('SessionTime').join(pos_resampled, how='left')

    return pd.DataFrame(car_data_labeled.reset_index())

fp1 = all_laps_by_session['FP1']
fp2 = all_laps_by_session['FP2']
fp3 = all_laps_by_session['FP3']

track = fp1.session.get_circuit_info()
lap = fp1.pick_lap(5)
car_data = lap.get_car_data().add_distance()
pos_data = lap.get_pos_data()
corners = []

for _, row in track.corners.iterrows():
    corners.append({
        'name': f"{row['Number']}{row['Letter']}",
        'x': row['X'],
        'y': row['Y']
    })


# Label pos_data
pos_labeled = label_pos_data_with_corners(pos_data, corners)

# Add corner info to car_data
car_labeled = merge_corner_labels_into_car_data(car_data, pos_labeled)

# Now you can easily do:
car_labeled[car_labeled['corner'] == '1']

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

def plot_cluster_radar(df_summary, features, cluster_ids=None):
    if cluster_ids is None:
        cluster_ids = df_summary.index.tolist()

    num_vars = len(features)
    angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
    angles += angles[:1]

    fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))

    for cluster in cluster_ids:
        values = df_summary.loc[cluster, features].tolist()
        values += values[:1]
        ax.plot(angles, values, label=f'Cluster {int(cluster)}')
        ax.fill(angles, values, alpha=0.1)

    ax.set_theta_offset(np.pi / 2)
    ax.set_theta_direction(-1)
    ax.set_thetagrids(np.degrees(angles[:-1]), features)
    plt.legend(loc='upper right', bbox_to_anchor=(1.1, 1.1))
    plt.title("Driving Style Cluster Profiles", fontsize=15)
    plt.show()

# Use this:
features_to_plot = ['speed_avg', 'rpm_avg', 'throttle_avg', 'brake_avg', 'TyreLife', 'TrackTemp']
plot_cluster_radar(cluster_summary, features=features_to_plot)


## Clustering

In [None]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

def cluster_laps(df, n_clusters=3, features=['speed_avg', 'throttle_avg', 'brake_avg']):
    """
    Cluster laps based on selected driving behavior features.

    Args:
        df (pd.DataFrame): Lap summary DataFrame.
        n_clusters (int): Number of clusters to create.
        features (list): Columns to use for clustering.

    Returns:
        pd.DataFrame: Original DataFrame with 'cluster' column added.
    """
    # Drop rows with missing feature data
    cluster_df = df.dropna(subset=features).copy()

    # Normalize
    scaler = StandardScaler()
    X = scaler.fit_transform(cluster_df[features])

    # KMeans clustering
    kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
    cluster_labels = kmeans.fit_predict(X)

    # Add cluster labels
    cluster_df['cluster'] = cluster_labels

    return cluster_df


In [None]:
def compare_sectors_by_cluster(df):
    """
    Prints average sector times per cluster.
    """
    print("\n=== Sector Comparison by Cluster ===")
    for cluster in sorted(df['cluster'].unique()):
        cluster_data = df[df['cluster'] == cluster]
        print(f"\nCluster {cluster} ({len(cluster_data)} laps):")
        print(cluster_data[['sector1', 'sector2', 'sector3']].mean())


In [None]:
all_laps = [fp1_df,fp2_df,fp3_df,quali,races]
lap_summary = cluster_laps(all_laps, n_clusters=3)
compare_sectors_by_cluster(lap_summary)
