# Supercross Lap Time Analysis

In [26]:
import pandas as pd

# Load the data from the uploaded Excel file
data = pd.read_csv('all_laps_SX_20240129.csv')

# Display the first few rows of the dataframe to understand its structure
data.head()
data['RIDER'] = data['RIDER'].str.upper()
data['RIDER'].nunique()

1221

In [18]:
data.shape

(283274, 5)

# Laptime Outlier Detection

1. For each rider, calculate the Z-score for their lap times.
2. Consider lap times with a Z-score greater than 3 as outliers.
3. Extract these outlier lap times for further inspection or analysis.

In [20]:
import pandas as pd
import numpy as np
from scipy import stats

data['LAPTIME_SECONDS'] = data['LAPTIME'].astype(float)
# Convert 'RIDER' names to uppercase to ensure consistency
data['RIDER'] = data['RIDER'].str.upper()

# Function to identify outliers and return a DataFrame with all outliers
def identify_outliers(data):
    # Initialize an empty DataFrame for collecting outliers
    outliers_df = pd.DataFrame(columns=data.columns)

    # Process each rider in the dataset
    for rider in data['RIDER'].unique():
        rider_data = data[data['RIDER'] == rider]
        if not rider_data.empty:
            z_scores = np.abs(stats.zscore(rider_data['LAPTIME_SECONDS']))
            rider_outliers = rider_data[z_scores > 3]  # Using Z-score > 3 as outlier criteria
            
            # Append outliers for this rider to the collective DataFrame
            outliers_df = pd.concat([outliers_df, rider_outliers], axis=0)

    return outliers_df

# Identify outliers for all riders in the dataset
outliers = identify_outliers(data)

# Specify the path for the output Excel file
output_excel_path = 'outliers.xlsx'
outliers.to_excel(output_excel_path, index=False)
print(f"Outliers successfully saved to {output_excel_path}.")

Outliers successfully saved to outliers.xlsx.


In [24]:
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

# Count how many outliers each rider has
outlier_counts = outliers['RIDER'].value_counts().reset_index()
outlier_counts.columns = ['RIDER', 'Outlier_Count']

# Save the frequency table to an Excel file
frequency_table_path = 'outlier_frequency.xlsx'
outlier_counts.to_excel(frequency_table_path, index=False)

print(f"Frequency table of outliers per rider has been saved to {frequency_table_path}.")

Frequency table of outliers per rider has been saved to outlier_frequency.xlsx.
