### Topic: Statistical Analysis with SciPy

Objective: Learn how to apply statistics to racing data to answer meaningful performance questions using SciPy.

In [None]:
# Statistical Analysis with SciPy: F1 Race Data

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, linregress, ttest_ind

# Load the dataset
df = pd.read_csv(r"C:\Users\p.muthusenapathy\VSCode_Projects\Python_Training\datasets\F1 data.csv")


# 1. Correlation Analysis: Tire Type vs Lap Time
# ----------------------------------------------
df['tire_type_encoded'] = df['tire_type'].map({'Soft': 0, 'Medium': 1, 'Hard': 2})
df_corr = df.dropna(subset=['tire_type_encoded', 'lap_time'])

corr, p_corr = pearsonr(df_corr['tire_type_encoded'], df_corr['lap_time'])
print("\n--- Correlation between Tire Type and Lap Time ---")
print(f"Pearson Correlation: {corr:.2f}, P-Value: {p_corr:.4f}")

# 2. Regression Analysis: Track Temp vs Lap Time
# -----------------------------------------------
df_reg = df.dropna(subset=['track_temp', 'lap_time'])
slope, intercept, r_val, p_reg, std_err = linregress(df_reg['track_temp'], df_reg['lap_time'])

print("\n--- Regression: Track Temperature vs Lap Time ---")
print(f"Lap Time = {slope:.2f} * Track Temp + {intercept:.2f}")
print(f"R-squared: {r_val**2:.2f}, P-Value: {p_reg:.4f}")

# 3. Hypothesis Testing: Are McLaren Pit Stops Slower?
# -----------------------------------------------------
mclaren_pit = df[df['team'] == 'McLaren']['pit_stop_time'].dropna()
other_pit = df[df['team'] != 'McLaren']['pit_stop_time'].dropna()

t_stat, p_val = ttest_ind(mclaren_pit, other_pit, equal_var=False)

print("\n--- Hypothesis Test: McLaren vs Other Teams (Pit Stop Time) ---")
print(f"T-statistic: {t_stat:.2f}, P-value: {p_val:.4f}")
if p_val < 0.05:
    print("Conclusion: Significant difference in pit stop times.")
else:
    print("Conclusion: No significant difference in pit stop times.")

# 4. Visualization
# -----------------
sns.boxplot(x='team', y='pit_stop_time', data=df[df['team'].isin(['McLaren', 'Mercedes'])])
plt.title("Pit Stop Time Comparison: McLaren vs Mercedes")
plt.xticks(rotation=45)
plt.show()
