In [158]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [159]:
df = pd.read_csv('data/processed_activities.csv')
# types to not want to include in rankings for distance and elevation
omit_types = ['AlpineSki', 'WeightTraining', 'Swim', 'Workout', 'RockClimbing', 'Elliptical']
aerobic_types = []

In [160]:
# Top 10 elevation overall
cols = ['name', 'type', 'start_date_local', 'total_elevation_gain']
df.loc[~df.type.isin(omit_types), cols].sort_values(by='total_elevation_gain', ascending=False).head(10)

Unnamed: 0,name,type,start_date_local,total_elevation_gain
1278,Grand Canyon R3,Run,2018-04-21,11538.71
606,Belford & Oxford,Run,2020-06-07,8881.23
500,Bear Peak x3,Run,2020-11-21,8215.22
1138,The Bear 50k!,Run,2018-09-28,7742.78
1840,Hyner 50k,Run,2016-04-23,7437.66
1488,Pikes Peak Ascent,Run,2017-08-19,7089.9
1292,Sanitas x5,Run,2018-04-01,6706.04
1565,Mudfest #2 on the GRT. Redeemed!,Run,2017-05-13,6574.8
612,Goat Trail via Skyline,Run,2020-05-25,6236.88
506,Skyline,Run,2020-11-06,6184.38


In [161]:
# Top 10 run distances
cols = ['name', 'type', 'start_date_local', 'miles']
df.loc[df.type=='Run', cols].sort_values(by='miles', ascending=False).head(10)

Unnamed: 0,name,type,start_date_local,miles
1565,Mudfest #2 on the GRT. Redeemed!,Run,2017-05-13,49.55
1278,Grand Canyon R3,Run,2018-04-21,47.09
1835,GRT 50Mile. DNF'D at mile 40 due to injury.,Run,2016-05-14,39.16
1474,Crested Butte Ultra. Body quit. Brain did not.,Run,2017-09-09,35.19
1138,The Bear 50k!,Run,2018-09-28,30.05
1840,Hyner 50k,Run,2016-04-23,29.66
1727,Moab 50K.,Run,2016-11-19,29.35
1853,Morning Run,Run,2016-03-26,24.0
1779,RRR pacing Tomas,Run,2016-09-16,21.28
1579,Gold Hill. In honor of Hyner.,Run,2017-04-22,21.02


In [162]:
# Top 10 bike distances
cols = ['name', 'type', 'start_date_local', 'miles']
df.loc[df.type=='Ride', cols].sort_values(by='miles', ascending=False).head(10)

Unnamed: 0,name,type,start_date_local,miles
426,Moab Cruisin,Ride,2021-03-27,86.67
1130,Lost in Denver with Eric,Ride,2018-10-13,52.53
807,I said hard times! They're just an old friend ...,Ride,2019-09-21,42.25
1347,Tour of Sufferlandria Stage 1: Who Dares + The...,Ride,2018-02-03,40.0
331,EVO: to & fro',Ride,2021-08-14,34.2
822,Black Project,Ride,2019-09-07,33.75
129,Hero Sand w/Jason,Ride,2022-05-14,32.05
128,Morning Ride,Ride,2022-05-14,32.03
1110,Afternoon Ride,Ride,2018-10-29,31.6
1606,29'n SoBo Creek->Dirty B->Doudy,Ride,2017-03-25,31.11


In [163]:
df['year_week'] = df['year'].astype(str) + '-' + df['week'].astype(str).str.zfill(2)
df.loc[~df.type.isin(omit_types)].groupby(['year_week']).agg({'hours': 'sum'}).sort_values(by='year_week', ascending=False).reset_index().head(12)

Unnamed: 0,year_week,hours
0,2022-48,3.91
1,2022-47,3.76
2,2022-46,4.81
3,2022-45,5.37
4,2022-44,1.48
5,2022-43,2.52
6,2022-42,2.71
7,2022-41,6.43
8,2022-40,8.67
9,2022-39,3.81


In [164]:
# Bar chart of aerobic time for last 12 weeks
df['year_week'] = df['year'].astype(str) + '-' + df['week'].astype(str).str.zfill(2)
grouped_yr_wk = df.loc[~df.type.isin(omit_types)].groupby(['year_week']).agg({'elapsed_time': 'sum'}).sort_values(by='year_week', ascending=False).reset_index().head(20).sort_values(by='year_week', ascending=True)
grouped_yr_wk['hours'] = grouped_yr_wk['elapsed_time'] / 60 / 60
time_x_week_bar = px.bar(grouped_yr_wk,
                          x="year_week",
                          y="hours",
                          title='Aerobic Training Time Last 20 Weeks'
                         )

time_x_week_bar.show()

In [171]:
# Most active days of the year (based on max activity elevtion and time)
scatter = px.scatter(df.groupby(['day_of_month', 'month']).agg({'total_elevation_gain':'max',
                                                  'hours': 'max'}).reset_index(), 
                 x='month', 
                 y='day_of_month', 
                 color='total_elevation_gain', 
                 size='hours',
                 category_orders={"month": ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]})
scatter.show()

In [166]:
elevation_filter = df[~df['type'].isin(omit_types)]
elevation_grouped = elevation_filter.groupby(['month', 'type']).agg({'total_elevation_gain': 'sum'}).reset_index()

In [167]:
# Elevation gain by month stacked bar
gain_x_month_bar = px.bar(elevation_grouped, 
                          x="month", 
                          y="total_elevation_gain", 
                          color="type", 
                          title='All-Time Elevation Gain by Month'
                         )

gain_x_month_bar.show()

In [168]:
days = elevation_filter.groupby(['year', 'day_of_year'])['total_elevation_gain'].sum().groupby(level=0).cumsum().reset_index()

gain_x_year_line = px.line(days, 
                           x="day_of_year", 
                           y="total_elevation_gain", 
                           title='Elevation Gain by Year', 
                           color='year'
                          )
gain_x_year_line.show()

In [169]:
elevation_trend_grouping = elevation_filter.groupby(['month', 'year']).agg({'total_elevation_gain': 'sum'}).reset_index()
elevation_trend_grouping.head()

Unnamed: 0,month,year,total_elevation_gain
0,April,2016,21430.44
1,April,2017,29289.33
2,April,2018,31459.97
3,April,2019,18840.55
4,April,2020,21832.7


In [170]:
# Trending Elevation Gain
gain_x_month_area = px.bar(elevation_trend_grouping, 
                           x="month", 
                           y="total_elevation_gain", 
                           title='Total Elevation Gain by Month', 
                           color='year',
                           category_orders={"month": ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]},   
                          )

gain_x_month_area.update_layout(xaxis_tickangle=45)

gain_x_month_area.show()