In [1]:
import pandas as pd
import altair as alt

In [2]:
# load the data
running_df = pd.read_csv('running_data.csv')

# get correct data types
running_df['time'] = running_df['time'].astype('datetime64[ns]')
running_df['date'] = running_df['date'].astype('datetime64[ns]')
print(running_df.dtypes)

duration                                 float64
distance                                 float64
avgHr                                    float64
elevationGain                            float64
elevationLoss                            float64
date                              datetime64[ns]
time                              datetime64[ns]
pace                                     float64
dailyTrainingLoadAcute                   float64
dailyTrainingLoadChronic                 float64
dailyAcuteChronicWorkloadRatio           float64
temp_avg                                 float64
dewpoint_avg                             float64
humidity_avg                             float64
wind_mph_avg                             float64
dtype: object


In [3]:
pace_iqr = running_df['pace'].quantile(0.75) - running_df['pace'].quantile(0.25)
lower_bound = running_df['pace'].quantile(0.25) - 1.5 * pace_iqr
upper_bound = running_df['pace'].quantile(0.75) + 1.5 * pace_iqr
print(f'Pace IQR: {pace_iqr}')
print(f'Lower bound for pace: {lower_bound}')
print(f'Upper bound for pace: {upper_bound}')

# find outliers
pace_outliers = running_df[(running_df['pace'] > upper_bound) | (running_df['pace'] < lower_bound)]

# remove outliers from original dataset
running_df = running_df.drop(pace_outliers.index)
avg_pace = running_df['pace'].mean()
pace_std = running_df['pace'].std()
num_runs = running_df.shape[0]
standard_error = pace_std / (num_runs ** 0.5)

# create a column for pace in quantiles
running_df['pace_normalized'] = ((running_df['pace'] - avg_pace) / standard_error) * -1 # normalize pace and flip so fastest is top
running_df['pace_quantile'] = running_df['pace_normalized'].rank(pct=True)
print(running_df[['pace', 'pace_normalized', 'pace_quantile']].head())


Pace IQR: 1.232380172171986
Lower bound for pace: 11.140892420953978
Upper bound for pace: 16.070413109641922
        pace  pace_normalized  pace_quantile
0  15.403827       -19.711879       0.043011
1  14.069090        -5.535016       0.279570
2  14.510904       -10.227725       0.150538
3  12.743073         8.549236       0.838710
4  14.024590        -5.062353       0.290323


In [6]:
# create a visualization for running parameters influencing pace
alt.Chart(running_df).mark_circle(size = 100).encode(alt.X('distance').title("Distance (miles)"),
                                           alt.Y('pace_quantile').title("Pace Quantile"),
                                           color = alt.Color('elevationGain',
                                                    scale=alt.Scale(scheme='tealblues')).title("Elevation Gain (ft)")).properties(
                                            title = "Running Parameter Influences on Pace")

In [7]:
# create a plot of exercise load influencing pace
alt.Chart(running_df).mark_circle(size = 100).encode(alt.X('dailyTrainingLoadChronic').title("Chronic Training Load"),
                                             alt.Y('pace_quantile').title("Pace Quantile"),
                                             color = alt.Color('dailyTrainingLoadAcute',
                                                     scale=alt.Scale(scheme='reds')).title("Acute Training Load")).properties(
                                              title = "Exercise Load Influences on Pace")

In [8]:
# create plot of weather conditions influencing pace
alt.Chart(running_df).mark_circle().encode(alt.X('humidity_avg').title('Humidity (%)'),
                                             alt.Y('pace_quantile').title("Pace Quantile"),
                                             size= alt.Size('wind_mph_avg').title("Wind Speed (mph)"),
                                             color = alt.Color('temp_avg',
                                                     scale=alt.Scale(scheme='blueorange')).title("Temperature (F)")).properties(
                                              title = "Environmental Influences on Pace")