In [248]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import dash_table

In [198]:
df = pd.read_csv('data/processed_activities.csv')
# types to not want to include in rankings for distance and elevation
omit_types = ['AlpineSki', 'WeightTraining', 'Swim', 'Workout', 'RockClimbing', 'Elliptical']
aerobic_types = []

In [199]:
# Top 10 elevation overall
cols = ['name', 'type', 'start_date_local', 'total_elevation_gain']
df.loc[~df.type.isin(omit_types), cols].sort_values(by='total_elevation_gain', ascending=False).head(10)

Unnamed: 0,name,type,start_date_local,total_elevation_gain
1278,Grand Canyon R3,Run,2018-04-21,11538.71
606,Belford & Oxford,Run,2020-06-07,8881.23
500,Bear Peak x3,Run,2020-11-21,8215.22
1138,The Bear 50k!,Run,2018-09-28,7742.78
1840,Hyner 50k,Run,2016-04-23,7437.66
1488,Pikes Peak Ascent,Run,2017-08-19,7089.9
1292,Sanitas x5,Run,2018-04-01,6706.04
1565,Mudfest #2 on the GRT. Redeemed!,Run,2017-05-13,6574.8
612,Goat Trail via Skyline,Run,2020-05-25,6236.88
506,Skyline,Run,2020-11-06,6184.38


In [200]:
# Top 10 run distances
cols = ['name', 'type', 'start_date_local', 'miles']
df.loc[df.type=='Run', cols].sort_values(by='miles', ascending=False).head(10)

Unnamed: 0,name,type,start_date_local,miles
1565,Mudfest #2 on the GRT. Redeemed!,Run,2017-05-13,49.55
1278,Grand Canyon R3,Run,2018-04-21,47.09
1835,GRT 50Mile. DNF'D at mile 40 due to injury.,Run,2016-05-14,39.16
1474,Crested Butte Ultra. Body quit. Brain did not.,Run,2017-09-09,35.19
1138,The Bear 50k!,Run,2018-09-28,30.05
1840,Hyner 50k,Run,2016-04-23,29.66
1727,Moab 50K.,Run,2016-11-19,29.35
1853,Morning Run,Run,2016-03-26,24.0
1779,RRR pacing Tomas,Run,2016-09-16,21.28
1579,Gold Hill. In honor of Hyner.,Run,2017-04-22,21.02


In [201]:
# Top 10 bike distances
cols = ['name', 'type', 'start_date_local', 'miles']
df.loc[df.type=='Ride', cols].sort_values(by='miles', ascending=False).head(10)

Unnamed: 0,name,type,start_date_local,miles
426,Moab Cruisin,Ride,2021-03-27,86.67
1130,Lost in Denver with Eric,Ride,2018-10-13,52.53
807,I said hard times! They're just an old friend ...,Ride,2019-09-21,42.25
1347,Tour of Sufferlandria Stage 1: Who Dares + The...,Ride,2018-02-03,40.0
331,EVO: to & fro',Ride,2021-08-14,34.2
822,Black Project,Ride,2019-09-07,33.75
129,Hero Sand w/Jason,Ride,2022-05-14,32.05
128,Morning Ride,Ride,2022-05-14,32.03
1110,Afternoon Ride,Ride,2018-10-29,31.6
1606,29'n SoBo Creek->Dirty B->Doudy,Ride,2017-03-25,31.11


In [203]:
# Bar chart of aerobic time for last 20 weeks
df['year_week'] = df['year'].astype(str) + '-' + df['week'].astype(str).str.zfill(2)
grouped_yr_wk = df.loc[~df.type.isin(omit_types)].groupby(['year_week']).agg({'hours': 'sum'}).sort_values(by='year_week', ascending=False).reset_index().head(20).sort_values(by='year_week', ascending=True)
time_x_week_bar = px.bar(grouped_yr_wk,
                          x="year_week",
                          y="hours",
                          title='Aerobic Training Time Last 20 Weeks'
                         )

time_x_week_bar.show()

In [204]:
# Bar chart of aerobic time for last 12 weeks
df['year_week'] = df['year'].astype(str) + '-' + df['week'].astype(str).str.zfill(2)
grouped_yr_wk = df.loc[df.type=='WeightTraining'].groupby(['year_week']).agg({'hours': 'sum'}).sort_values(by='year_week', ascending=False).reset_index().head(20).sort_values(by='year_week', ascending=True)
time_x_week_bar = px.bar(grouped_yr_wk,
                          x="year_week",
                          y="hours",
                          title='Strength Duration Last 20 Weeks'
                         )

time_x_week_bar.show()

In [205]:
from functools import reduce
dfs = [
    df.groupby('year').agg({'bear_peak_count':'sum'}).reset_index(),
    df.groupby('year').agg({'sanitas_count':'sum'}).reset_index(),
    df.groupby('year').agg({'second_flatiron_count':'sum'}).reset_index()
]

df_merged = reduce(lambda  left,right: pd.merge(left,right, on=['year'],
                                            how='left'), dfs)

df_merged

Unnamed: 0,year,bear_peak_count,sanitas_count,second_flatiron_count
0,2015,0,0,0
1,2016,15,13,10
2,2017,59,24,5
3,2018,27,32,24
4,2019,13,16,11
5,2020,20,23,16
6,2021,13,12,16
7,2022,5,20,1


In [249]:
dash_table.DataTable(
    data=df_merged.to_dict('records'),
    columns=[df_merged.columns]
)

DataTable(data=[{'year': 2015, 'bear_peak_count': 0, 'sanitas_count': 0, 'second_flatiron_count': 0}, {'year': 2016, 'bear_peak_count': 15, 'sanitas_count': 13, 'second_flatiron_count': 10}, {'year': 2017, 'bear_peak_count': 59, 'sanitas_count': 24, 'second_flatiron_count': 5}, {'year': 2018, 'bear_peak_count': 27, 'sanitas_count': 32, 'second_flatiron_count': 24}, {'year': 2019, 'bear_peak_count': 13, 'sanitas_count': 16, 'second_flatiron_count': 11}, {'year': 2020, 'bear_peak_count': 20, 'sanitas_count': 23, 'second_flatiron_count': 16}, {'year': 2021, 'bear_peak_count': 13, 'sanitas_count': 12, 'second_flatiron_count': 16}, {'year': 2022, 'bear_peak_count': 5, 'sanitas_count': 20, 'second_flatiron_count': 1}], columns=[Index(['year', 'bear_peak_count', 'sanitas_count', 'second_flatiron_count'], dtype='object')])

In [214]:
# Count of bear peaks, sanitas, 2nd_flatiron by year
px.bar(
    data_frame = df_merged.loc[df_merged.year != 2015],
    x = 'year',
    y = ['bear_peak_count','sanitas_count','second_flatiron_count'],
    opacity = 0.9,
    orientation = "v",
    barmode = 'group',
    title='Most Common Routes Count by Year',
)

In [246]:
scatter = px.scatter(df.loc[~df['type'].isin(omit_types)].sort_values(by=['day_of_month', 'month', 'total_elevation_gain'], ascending=False).drop_duplicates(subset=['day_of_month', 'month'])[['month', 'day_of_month', 'year', 'name', 'type', 'hours', 'total_elevation_gain']], 
                 x='month', 
                 y='day_of_month', 
                 color='total_elevation_gain', 
                 size='hours',
                 custom_data=['year', 'name', 'total_elevation_gain', 'type'],
                 category_orders={"month": ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]})

scatter.update_traces(
    hovertemplate="<br>".join([
        "Name: %{customdata[1]}",
        "Type: %{customdata[3]}",
        "Date: %{x} %{y}, %{customdata[0]}",
        #"Month: %{x}",
        #"Day: %{y}",
        #"Year: %{customdata[0]}",
        "Elevation: %{customdata[2]}",
    ])
)

scatter.show()

In [208]:
elevation_filter = df[~df['type'].isin(omit_types)]
elevation_grouped = elevation_filter.groupby(['month', 'type']).agg({'total_elevation_gain': 'sum'}).reset_index()
# Elevation gain by month stacked bar
gain_x_month_bar = px.bar(elevation_grouped, 
                          x="month", 
                          y="total_elevation_gain", 
                          color="type", 
                          title='All-Time Elevation Gain by Month'
                         )

gain_x_month_bar.show()

In [291]:
def gain_by_year_line(dataframe):
    NON_AEROBIC = ['AlpineSki', 'WeightTraining', 'Workout', 'RockClimbing']
    data_filtered = df.loc[~df['type'].isin(NON_AEROBIC)]
    days = data_filtered.groupby(['year', 'day_of_year'])['total_elevation_gain'].sum().groupby(level=0).cumsum().reset_index()

    line = px.line(
        days, 
        x='day_of_year', 
        y='total_elevation_gain', 
        title='Elevation Gain by Year',
        custom_data=['year', 'total_elevation_gain'],
        color='year'
    )
    line.update_traces(
        hovertemplate='<br>'.join([
            '<extra></extra>',
            'Year: %{customdata[0]}',
            'Elevation: %{y:,.0f}'
        ])
    )
    return line.show()

gain_by_year_line(df)

In [260]:
elevation_trend_grouping = elevation_filter.groupby(['month', 'year']).agg({'total_elevation_gain': 'sum'}).astype(int).reset_index()
elevation_trend_grouping.head()

Unnamed: 0,month,year,total_elevation_gain
0,April,2016,21430
1,April,2017,29289
2,April,2018,31459
3,April,2019,18840
4,April,2020,21832


In [268]:
# Trending Elevation Gain
MONTHS = {"month": ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]}
def stacked_elevation_gain_x_month(dataframe):
    df = dataframe
    NON_AEROBIC = ['AlpineSki', 'WeightTraining', 'Workout', 'RockClimbing']
    data_filtered = df.loc[~df['type'].isin(NON_AEROBIC)]
    data_grouped = data_filtered.groupby(['month', 'year']).agg({'total_elevation_gain': 'sum'}).astype(int).reset_index()
    bar = px.bar(data_grouped, 
                            x="month", 
                            y="total_elevation_gain", 
                            title='Total Elevation Gain by Month', 
                            color='year',
                            custom_data=['year'],
                            category_orders=MONTHS
                            )
    bar.update_traces(
        hovertemplate="<br>".join([
            "Month: %{x}",
            "Year: %{customdata[0]}",
            "Elevation: %{y:,}"
        ])
    )
    return bar.show()

stacked_elevation_gain_x_month(df)

In [250]:
NON_AEROBIC = ['AlpineSki', 'WeightTraining', 'Workout', 'RockClimbing']

def max_vert_scatter_plot(dataframe):
    df = dataframe
    plot_cols = ['month', 'day_of_month', 'year', 'name', 'type', 'hours', 'total_elevation_gain']
    sort_cols = ['day_of_month', 'month', 'total_elevation_gain']
    dups_cols = ['day_of_month', 'month']
    plot_data = df.loc[~df['type'].isin(NON_AEROBIC)]
    month_dict = {
        "month": ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
    }
    
    scatter = px.scatter(
        plot_data.sort_values(by=sort_cols, ascending=False).drop_duplicates(subset=dups_cols)[plot_cols], 
        x='month', 
        y='day_of_month', 
        color='total_elevation_gain', 
        size='hours',
        custom_data=['year', 'name', 'total_elevation_gain', 'type'],
        category_orders=month_dict
    )

    scatter.update_traces(
        hovertemplate="<br>".join([
            "Name: %{customdata[1]}",
            "Type: %{customdata[3]}",
            "Date: %{x} %{y}, %{customdata[0]}",
            "Elevation: %{customdata[2]}",
        ])
    )
    return scatter.show()

In [251]:
max_vert_scatter_plot(df)

In [295]:
# Bar chart of aerobic time x week x year
grouped_yr = df.loc[(~df.type.isin(omit_types)) & (df.year == 2017)].groupby(['week']).agg({'hours': 'sum'}).sort_values(by='week').reset_index()
time_x_week_x_year_bar = px.bar(grouped_yr,
                          x="week",
                          y="hours",
                          title='Aerobic Training Time 2017'
                         )

time_x_week_x_year_bar.show()