# Imports

In [12]:
from helpers import load_or_build_profiles, build_circuit_profile_df
import plotly.express as px
import plotly.io as pio
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import plotly.graph_objects as go
import numpy as np

pio.renderers.default = "notebook" 

# Get data

In [13]:
df_profiles, df_skipped = load_or_build_profiles(
    cache_path="data/circuit_profiles.csv",
    start_year=2024,
    end_year=2025
)

🔁 Race weekend started — updating recent sessions only...
ℹ️ No new sessions to append.


In [14]:
df_profiles

Unnamed: 0,year,event,location,session,real_altitude,lap_length,telemetry_source,num_drs_zones,drs_total_len_m,drs_pct_of_lap,avg_speed,top_speed,braking_events,low_pct,med_pct,high_pct,slow_corners,medium_corners,fast_corners,chicanes,air_temp_avg,track_temp_avg,rain_detected
0,2024,Bahrain Grand Prix,Sakhir,FP1,8.0,5357.197222,fastf1,3,3638.044722,0.679095,206.997167,310.0,11,0.124646,0.286119,0.589235,3,4,2,0,19.373171,34.215854,False
1,2024,Bahrain Grand Prix,Sakhir,FP2,8.0,5342.252778,fastf1,3,3627.975278,0.679110,213.737952,316.0,10,0.114458,0.268072,0.617470,2,0,0,0,17.247561,22.098780,False
2,2024,Bahrain Grand Prix,Sakhir,FP3,8.0,5390.449722,fastf1,3,3651.490278,0.677400,212.495652,316.0,10,0.124638,0.255072,0.620290,2,4,1,1,20.259756,32.013415,False
3,2024,Bahrain Grand Prix,Sakhir,Q,8.0,5369.465278,fastf1,3,3650.405278,0.679845,215.886567,319.0,15,0.119403,0.259701,0.620896,2,3,3,1,18.049351,21.514286,False
4,2024,Bahrain Grand Prix,Sakhir,R,8.0,5356.580278,fastf1,0,0.000000,,208.853868,301.0,7,0.120344,0.269341,0.610315,3,4,1,0,18.227389,23.652866,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,2025,Canadian Grand Prix,Montreal,FP3,11.0,4323.166111,fastf1,3,2555.865278,0.591202,220.413284,329.0,11,0.110701,0.276753,0.612546,2,3,0,0,20.402500,44.906250,False
168,2025,Canadian Grand Prix,Montreal,Q,11.0,4326.426111,fastf1,3,2576.998889,0.595641,219.858209,332.0,15,0.115672,0.276119,0.608209,1,4,0,0,21.300000,43.145882,False
169,2025,Canadian Grand Prix,Montreal,R,11.0,4283.939722,fastf1,0,0.000000,,205.055970,312.0,10,0.141791,0.309701,0.548507,3,3,2,1,24.036527,49.253293,True
170,2025,Austrian Grand Prix,Spielberg,FP1,681.0,4300.844722,fastf1,3,2418.924444,0.562430,238.206612,315.0,8,0.061983,0.165289,0.772727,1,1,4,1,24.010000,39.272500,False


# Track clustering

In [4]:
df_profiles['track_id'] = df_profiles['event'] + ' ' + df_profiles['year'].astype(str)
track_profile = df_profiles.groupby(['track_id'], as_index=False).median(numeric_only=True)

In [5]:
X = track_profile.drop(columns=['year'])

In [6]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X.select_dtypes(include='number'))

In [7]:
pca = PCA(n_components=2)
components = pca.fit_transform(X_scaled)

track_profile['PC1'] = components[:, 0]
track_profile['PC2'] = components[:, 1]

In [8]:
kmeans = KMeans(n_clusters=4, random_state=42)
track_profile['cluster'] = kmeans.fit_predict(X_scaled)

In [24]:
track_profile["cluster"] = (
    track_profile["cluster"]
    .astype("category")
)

cluster_order = [0,1,2,3,4,5,6,7,8]

In [25]:
fig = px.scatter(
    track_profile.loc[track_profile.year>=2024], 
    x='PC1', y='PC2', 
    color='cluster', 
    hover_data=['track_id'],
    title='Track Typology (Clustered Driving Styles)',
    color_continuous_scale='Viridis',
    category_orders={'cluster':cluster_order}
)
fig.update_traces(textposition='top center')
fig.show()


# 🎯 1. Understand the Driving Style Differences Between Clusters

In [26]:
# Recalculate cluster-wise mean
cluster_summary = track_profile.groupby('cluster').mean(numeric_only=True)

# Normalize each feature (row-wise is False → column-wise normalization)
cluster_norm = (cluster_summary - cluster_summary.min()) / (cluster_summary.max() - cluster_summary.min())

# Reset for plotting
cluster_norm = cluster_norm.reset_index()
cluster_melted = cluster_norm.melt(id_vars='cluster')

fig = px.bar(cluster_melted, 
             x='variable', 
             y='value', 
             color='cluster',
             category_orders={'cluster':cluster_order},
             barmode='group', 
             title='Normalized Cluster Driving Profiles')
fig.update_layout(xaxis_tickangle=45)
fig.show()






In [27]:
categories = [
    'avg_speed', 'top_speed', 'braking_events',
    'slow_corners', 'medium_corners', 'fast_corners',
    'low_pct', 'med_pct', 'high_pct', 'air_temp_avg','track_temp_avg'
]

fig = go.Figure()
for i, row in cluster_norm.iterrows():
    fig.add_trace(go.Scatterpolar(
        r=row[categories].values,
        theta=categories,
        fill='toself',
        name=f'Cluster {row["cluster"]}'
    ))

fig.update_layout(
    polar=dict(radialaxis=dict(visible=True)),
    title="Driving Style Radar per Cluster"
)
fig.show()


# 🧪 2. Predictive Power of FP1-2-3 for Q & R

# 🔄 3. Heatmaps or Pairplots per Cluster

# 🔮 4. Generate Track Typology Labels
