In [2]:
#############
# LIBRARIES #
#############

from get_strava_data import my_data, process_data, bike_data, get_elevation # Functions to retrive data using strava api and process for visualizations

import pandas as pd
from pandas.api.types import CategoricalDtype
import numpy as np
import datetime as dt

import matplotlib.pyplot as plt
# import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

In [3]:
# Opening activities data

processed_data = pd.read_csv('./data/processed_data.csv')
processed_data.head()

Unnamed: 0,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,id,start_date_local,achievement_count,...,pr_count,total_photo_count,suffer_score,average_temp,year,month,day,weekday,hour,elev_gain_per_mile
0,Zwift - 2 by 2 in Watopia,7.5,0.5,0.5,324.8,VirtualRide,VirtualRide,8491181626,02-01-2023,1,...,0,0,23.0,,2023,2,1,Wednesday,20,43.3
1,Alameda & Bay Farm,26.4,1.5,1.6,157.5,Ride,Ride,8485072654,01-31-2023,10,...,3,2,140.0,16.0,2023,1,31,Tuesday,14,6.0
2,Evening Weight Training,0.0,0.3,0.3,0.0,WeightTraining,WeightTraining,8479862220,01-30-2023,0,...,0,0,3.0,,2023,1,30,Monday,19,
3,Zwift - Roger Lefort s Meetup on Wandering Fla...,18.5,1.0,1.1,505.2,VirtualRide,VirtualRide,8474322131,01-29-2023,1,...,0,0,9.0,,2023,1,29,Sunday,14,27.3
4,Grizzly Peak via Tunnel w/ TA,41.2,3.1,4.1,2582.0,Ride,Ride,8467691962,01-28-2023,38,...,0,0,77.0,13.0,2023,1,28,Saturday,8,62.7


In [4]:
processed_data = processed_data[processed_data.type =='Ride']

grouped_by_day_hour = processed_data.groupby(['weekday', 'hour']).agg({'id': 'count'}).reset_index()

# Custom sorting weekdays starting on monday
day_order = CategoricalDtype(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], ordered=True)
grouped_by_day_hour['weekday'] = grouped_by_day_hour['weekday'].astype(day_order)
grouped_by_day_hour.sort_values('weekday', inplace=True)

# Creating new entries for hours with no data
mux = pd.MultiIndex.from_product([grouped_by_day_hour.weekday.unique(), range(0,24)], names=['weekday', 'hour'])
grouped_by_day_hour = grouped_by_day_hour.set_index(['weekday', 'hour']).reindex(mux, fill_value=0).reset_index()

# Pivoting table to get the number of activities per hour
pivot_data = grouped_by_day_hour.pivot_table(index='weekday', columns='hour', values='id', aggfunc='sum')
pivot_data.fillna(0, inplace=True)
pivot_data


hour,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Monday,0,0,0,0,0,0,0,0,2,0,...,2,4,8,5,20,3,3,0,0,0
Tuesday,0,0,0,0,0,0,0,0,0,0,...,7,12,9,12,10,7,0,0,0,0
Wednesday,0,0,0,0,0,0,0,0,0,0,...,3,1,4,21,13,7,1,1,2,0
Thursday,0,0,0,0,0,0,0,0,0,0,...,6,12,5,18,22,4,1,7,0,0
Friday,0,1,0,0,0,1,0,1,1,1,...,10,23,6,8,9,1,1,0,0,0
Saturday,0,0,0,0,1,1,2,5,24,17,...,21,15,12,6,7,1,3,1,1,0
Sunday,0,0,0,0,0,2,5,4,11,19,...,17,10,9,9,4,2,2,2,0,0


In [5]:
# Creating heatmap of workouts by weekday and hour
hours_of_day = ['12a', '1am', '2am', '3am', '4am', '5am', '6am', '7am', '8am', '9am', '10am', '11am', '12pm', '1pm', '2pm', '3pm', '4pm', '5pm', '6pm', '7pm', '8pm', '9pm', '10p', '11p']

fig = px.imshow(pivot_data,
labels=dict(x="", y=""),
color_continuous_scale='oranges',
x=hours_of_day,
aspect="auto"
)
fig.update_xaxes(side="top")
fig.update_layout(
    title='',
    xaxis_nticks=8,
    showlegend = False,
    plot_bgcolor='rgba(0,0,0,0)')
fig.show()

In [6]:
processed_data = processed_data[processed_data.type =='Ride']

grouped_by_month_day = processed_data.groupby(['month', 'day']).agg({'id': 'count'}).reset_index()
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
# Custom sorting weekdays starting on monday
month_order = CategoricalDtype(['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'], ordered=True)
# Converting month number to month name
grouped_by_month_day['month'] = grouped_by_month_day['month'].apply(lambda x: months[x-1])
grouped_by_month_day['month'] = grouped_by_month_day['month'].astype(month_order)
grouped_by_month_day.sort_values('month', inplace=True)

# # Creating new entries for hours with no data
# mux = pd.MultiIndex.from_product([grouped_by_month_day.month.unique(), range(0,24)], names=['month', 'day'])
# grouped_by_month_day = grouped_by_month_day.set_index(['month', 'day']).reindex(mux, fill_value=0).reset_index()

# Pivoting table to get the number of activities per hour
pivot_data_month_day = grouped_by_month_day.pivot_table(index='month', columns='day', values='id', aggfunc='sum')
pivot_data_month_day.fillna(0, inplace=True)
pivot_data_month_day

day,1,2,3,4,5,6,7,8,9,10,...,22,23,24,25,26,27,28,29,30,31
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
January,1,0,0,1,1,0,1,2,1,0,...,0,1,0,0,1,1,1,0,0,2
February,0,0,0,0,1,1,0,0,0,0,...,0,1,0,0,0,2,0,0,0,0
March,1,1,0,0,1,1,0,0,0,0,...,2,1,0,0,3,2,1,1,2,2
April,0,3,3,1,1,2,2,1,4,2,...,1,2,3,2,3,2,1,1,1,0
May,4,2,3,3,2,3,5,2,2,2,...,3,2,5,4,5,3,2,3,5,3
June,6,5,4,4,1,3,5,1,3,4,...,2,4,5,2,2,3,2,1,1,0
July,2,3,2,4,3,2,1,6,3,4,...,5,2,5,4,6,4,4,3,5,4
August,5,8,1,3,3,4,3,2,4,2,...,2,2,1,1,4,5,3,4,4,1
September,2,2,3,3,3,3,0,3,5,1,...,1,2,3,2,4,2,1,1,5,0
October,5,1,2,4,4,4,0,3,3,0,...,3,1,4,3,0,1,1,2,2,1


In [7]:
# Creating heatmap of workouts by month and day

fig = px.imshow(pivot_data_month_day,
labels=dict(x="", y="", color="Activities"),
color_continuous_scale='oranges',
# x=hours_of_day,
aspect="auto"
)
fig.update_xaxes(side="top")
fig.update_layout(
    title='',
    xaxis_nticks=8,
    showlegend = False,
    plot_bgcolor='rgba(0,0,0,0)')
fig.show()

In [None]:
processed_data.columns

In [None]:
######################
# PERFORMACE METRICS #
######################

metric_1 = 'total_elevation_gain'
metric_2 = 'total_elevation_gain'

processed_data['year'] = processed_data['year'].astype(str)
# Scatterplots
fig = px.scatter(processed_data, x=metric_1, y=metric_2, size='distance', color='year', title='Average Speed vs Average Cadence', trendline="ols")

fig.update_layout(
    title=f'{metric_1} vs {metric_2}',
    xaxis_title=f'{metric_1}',
    yaxis_title=f'{metric_2}',
    plot_bgcolor='rgba(0,0,0,0)'
    )
