ting

In [28]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Load the dataset from your file
file_path = '/Users/ngirmay/Documents/GitHub/ironman_retrospective/IronMan_2023/training_peaks/workouts.csv'  # Adjust this to the correct file path
df = pd.read_csv(file_path)

# Filter for Run workouts
run_df = df[df['WorkoutType'] == 'Run'].sort_values('WorkoutDay')

# Convert meters to miles and calculate pace
run_df['DistanceInMiles'] = run_df['DistanceInMeters'] / 1609.34
run_df['PaceMinPerMile'] = (run_df['TimeTotalInHours'] * 60) / run_df['DistanceInMiles']

# Convert 'WorkoutDay' to datetime
run_df['WorkoutDay'] = pd.to_datetime(run_df['WorkoutDay'])

# Calculate rolling averages
window = 14  # Slightly larger window for smoother graphs
run_df['PaceRolling'] = run_df['PaceMinPerMile'].rolling(window=window, min_periods=1).mean()
run_df['DistanceRolling'] = run_df['DistanceInMiles'].rolling(window=window, min_periods=1).mean()
run_df['HRRolling'] = run_df['HeartRateAverage'].rolling(window=window, min_periods=1).mean()
run_df['CadenceRolling'] = run_df['CadenceAverage'].rolling(window=window, min_periods=1).mean()

# Normalize all metrics to 0-100 scale
metrics = ['PaceRolling', 'DistanceRolling', 'HRRolling', 'CadenceRolling']
for metric in metrics:
    run_df[f'{metric}Normalized'] = (run_df[metric] - run_df[metric].min()) / (run_df[metric].max() - run_df[metric].min()) * 100

# Invert pace (lower is better)
run_df['PaceRollingNormalized'] = 100 - run_df['PaceRollingNormalized']

# Color scheme
colors = {
    'Pace': '#e63946',      # Red
    'Distance': '#457b9d',  # Blue
    'HeartRate': '#a8dadc', # Light blue
    'Cadence': '#1d3557',   # Dark blue
}

# Create the main figure
fig = make_subplots(rows=1, cols=1, shared_xaxes=True, vertical_spacing=0.1)

# Add traces for each normalized metric with thinner lines and gridlines
fig.add_trace(
    go.Scatter(
        x=run_df['WorkoutDay'],
        y=run_df['PaceRollingNormalized'],
        mode='lines',
        name='Pace',
        line=dict(color=colors['Pace'], width=1.5),
    )
)
fig.add_trace(
    go.Scatter(
        x=run_df['WorkoutDay'],
        y=run_df['DistanceRollingNormalized'],
        mode='lines',
        name='Distance',
        line=dict(color=colors['Distance'], width=1.5),
    )
)
fig.add_trace(
    go.Scatter(
        x=run_df['WorkoutDay'],
        y=run_df['HRRollingNormalized'],
        mode='lines',
        name='Heart Rate',
        line=dict(color=colors['HeartRate'], width=1.5),
    )
)
fig.add_trace(
    go.Scatter(
        x=run_df['WorkoutDay'],
        y=run_df['CadenceRollingNormalized'],
        mode='lines',
        name='Cadence',
        line=dict(color=colors['Cadence'], width=1.5),
    )
)

# Update layout for better readability
fig.update_layout(
    title="Normalized Metrics Over Time (Smoothed)",
    height=600,
    showlegend=True,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    plot_bgcolor='#f1faee',  # Light background color
    paper_bgcolor='#f1faee',
    xaxis=dict(showgrid=True, gridcolor='lightgrey'),
    yaxis=dict(showgrid=True, gridcolor='lightgrey')
)

# Update x and y axes
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Normalized Metrics (0-100)")

# Show the plot
fig.show()

# Summary statistics
print(f"Total number of runs: {len(run_df)}")
print(f"Total distance run: {run_df['DistanceInMiles'].sum():.2f} miles")
print(f"Average pace: {run_df['PaceMinPerMile'].mean():.2f} min/mile")
print(f"Average heart rate: {run_df['HeartRateAverage'].mean():.2f} bpm")
print(f"Average cadence: {run_df['CadenceAverage'].mean():.2f} spm")

# Calculate improvements
first_month = run_df.head(30)
last_month = run_df.tail(30)

pace_improvement = (first_month['PaceMinPerMile'].mean() - last_month['PaceMinPerMile'].mean()) / first_month['PaceMinPerMile'].mean() * 100
distance_improvement = (last_month['DistanceInMiles'].mean() - first_month['DistanceInMiles'].mean()) / first_month['DistanceInMiles'].mean() * 100
hr_improvement = (first_month['HeartRateAverage'].mean() - last_month['HeartRateAverage'].mean()) / first_month['HeartRateAverage'].mean() * 100
cadence_improvement = (last_month['CadenceAverage'].mean() - first_month['CadenceAverage'].mean()) / first_month['CadenceAverage'].mean() * 100

print(f"\nImprovements (comparing first and last 30 days):")
print(f"Pace improvement: {pace_improvement:.2f}%")
print(f"Average distance per run improvement: {distance_improvement:.2f}%")
print(f"Heart rate improvement: {hr_improvement:.2f}%")
print(f"Cadence improvement: {cadence_improvement:.2f}%")


Total number of runs: 88
Total distance run: 274.53 miles
Average pace: 10.03 min/mile
Average heart rate: 149.95 bpm
Average cadence: 69.08 spm

Improvements (comparing first and last 30 days):
Pace improvement: -0.22%
Average distance per run improvement: 23.45%
Heart rate improvement: 1.43%
Cadence improvement: 0.09%
