In [1]:
#############
# LIBRARIES #
#############

from get_strava_data import my_data, athlete_data, bike_data, process_data # Functions to retrive data using strava api and process for visualizations
# from fit2gpx import StravaConverter

import pandas as pd
import numpy as np
import datetime as dt

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

import plotly.io as pio
pio.renderers.default = 'notebook_connected'

import os

import streamlit as st


In [2]:
##########################
# Getting all activities #
##########################

my_data_df = my_data()
processed_data = process_data(my_data_df)

# Saving data to csv
processed_data.to_csv('./data/processed_data.csv', index=False)

#####################
# Getting bike data #
#####################

bike_data_df = bike_data()
bike_data_df.to_csv('./data/bike_data.csv', index=False)


########################
# Getting athlete data #
########################

athlete_df = athlete_data()
athlete_df.to_csv('./data/athlete_data.csv', index=False)

Getting data...
Requesting page 1...
<Response [200]>
Requesting page 2...
<Response [200]>
Requesting page 3...
<Response [200]>
Requesting page 4...
<Response [200]>
Data retrieved successfully!
Requesting data...
<Response [200]>
Data retrieved successfully!
Requesting data...
<Response [200]>
Data retrieved successfully!
Requesting data...
<Response [200]>
Data retrieved successfully!
Requesting data...
<Response [200]>
Data retrieved successfully!
Requesting data...
<Response [200]>
Data retrieved successfully!
Requesting data...
<Response [200]>
Data retrieved successfully!
Requesting data...
<Response [200]>
Data retrieved successfully!
Requesting data...
<Response [200]>
Data retrieved successfully!
Requesting data...
<Response [200]>
Data retrieved successfully!
Requesting data...
<Response [200]>
Data retrieved successfully!


In [None]:
# Reading data from csv
processed_data = pd.read_csv('./data/processed_data.csv')
bike_data_df = pd.read_csv('./data/bike_data.csv')
athlete_df = pd.read_csv('./data/athlete_data.csv')

In [None]:
processed_data.head()

In [None]:
bike_data_df.head()

In [None]:
athlete_df

In [None]:
processed_data.gear_id.unique()

In [None]:
bike_ids = ['b8099416', 'b4196400', 'b8615449', 'b4073790', 'b5245627', 'b8029179', 'b326351', 'b804798', 'b232108'] # [Giant TCR, Pure Fixie, Marin Headlands, Storck Scenero, Cannondale Slate, Marin Hawkhill]

tcr_odometer = bike_data_df[bike_data_df.model_name == 'TCR']['converted_distance'].values[0]
tcr_odometer

In [None]:
processed_data.columns

In [None]:
streamlit_df = processed_data[['start_date_local', 'name', 'type', 'moving_time', 'distance', 'total_elevation_gain', 'average_speed', 'average_cadence', 'average_watts', 'average_heartrate', 'suffer_score']].copy()
streamlit_df['start_date_local'] = pd.to_datetime(streamlit_df['start_date_local'])
streamlit_df['start_date_local'] = streamlit_df['start_date_local'].dt.strftime('%m-%d-%Y')
streamlit_df

In [None]:
# Heatmap of activity counts

time_windows = ['month', 'week', 'weekday']
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
time_of_day = ['morning', 'afternoon', 'evening']
weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

processed_data['time_of_day'] = processed_data['hour'].apply(lambda x: 'morning' if x < 12 else 'afternoon' if x < 18 else 'evening')

monthly_activity_counts = processed_data.groupby([time_windows[0], 'weekday', 'time_of_day']).agg({'id': 'count'}).reset_index()
monthly_activity_counts['month'] = monthly_activity_counts[time_windows[0]].apply(lambda x: months[x-1])

monthly_activity_counts

# fig = px.imshow(monthly_activity_counts[['id']])
# fig.show()

In [None]:
monthly_activity_counts['id'].values

In [None]:
streamlit_df.rename(columns={'start_date_local': 'Date','name': 'Name', 'type': 'Type', 'moving_time': 'Moving Time (hours)', 'distance': 'Distance (miles)', 'total_elevation_gain': 'Elevation Gain (ft)', 'average_speed': 'Average Speed (mph)', 'average_cadence': 'Average Cadence (rpm)', 'average_watts': 'Average Watts', 'average_heartrate': 'Average Heartrate', 'suffer_score': 'Suffer Score'}, inplace=True)

headerColor = '#45738F'
rowEvenColor = 'lightgrey'
rowOddColor = 'white'

fig = go.Figure(data=[go.Table(
    header=dict(values=list(streamlit_df.columns),

                line_color='darkslategray',
                fill_color=headerColor,
                align=['left','center'],
    font=dict(color='white', size=12)),
    cells=dict(values=[streamlit_df['Date'], streamlit_df['Name'], streamlit_df['Type'], streamlit_df['Moving Time (hours)'], streamlit_df['Distance (miles)'], streamlit_df['Elevation Gain (ft)'], streamlit_df['Average Speed (mph)'], streamlit_df['Average Cadence (rpm)'], streamlit_df['Average Watts'], streamlit_df['Average Heartrate'], streamlit_df['Suffer Score']],
               fill_color = [[rowOddColor,rowEvenColor]*len(streamlit_df.index),], font=dict(color='black', size=12), height=45,
               align=['left','center']))
])
fig.show()

In [None]:
streamlit_df.Type.unique()

In [None]:
today = dt.datetime.today()
this_month = today.month
this_year = today.year

months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
months_sofar = months[:this_month]

grouped_by_year_and_month = processed_data.groupby(['year', 'month', 'type']).agg({'distance': 'sum', 'total_elevation_gain': 'sum'}).reset_index() # Group by year and month

# Since not all months have data, we're creating entries for missing months and setting the distance and elevation gain to 0
mux = pd.MultiIndex.from_product([grouped_by_year_and_month.year.unique(), grouped_by_year_and_month.type.unique(), range(1,13)], names=['year', 'type' ,'month'])
grouped_by_year_and_month = grouped_by_year_and_month.set_index(['year', 'type', 'month']).reindex(mux, fill_value=0).reset_index()
grouped_by_year_and_month['Cumulative Distance'] = grouped_by_year_and_month.groupby(['year', 'type'])['distance'].cumsum()
grouped_by_year_and_month['Cumulative Elevation'] = grouped_by_year_and_month.groupby(['year', 'type'])['total_elevation_gain'].cumsum()

grouped_by_year_and_month['month'] = grouped_by_year_and_month['month'].apply(lambda x: months[x -1])

# Limiting data to current month
months_left = months[this_month:]
# Filtering out months beyond current one

no_data_yet = grouped_by_year_and_month[grouped_by_year_and_month.year == this_year]
no_data_yet = no_data_yet[no_data_yet.month.isin(months_left)]

# Removing upcoming months with no data from dataframe
grouped_by_year_and_month = grouped_by_year_and_month[~grouped_by_year_and_month.isin(no_data_yet)]
# Dropping na years
grouped_by_year_and_month = grouped_by_year_and_month.dropna(subset=['year'])

grouped_by_year_and_month['year'] = grouped_by_year_and_month['year'].astype(int)

grouped_by_year_and_month.tail(50)


In [None]:
grouped_by_year_and_month[grouped_by_year_and_month.year.isna()]

In [None]:
months_in_year = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

this_month = today.month
this_year = today.year
months_sofar = months_in_year[:this_month]
months_sofar

In [None]:
year_2022 = grouped_by_year_and_month[grouped_by_year_and_month.year == this_year]
year_2022[year_2022.month.isin(months_sofar)]

In [None]:
####### #
# GOALS #
#########
today = dt.datetime.today()
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
this_month = dt.datetime.today().month

d0 = dt.datetime(2022, 1, 1)
d1 = dt.datetime.today()
delta = d1 - d0

days_gone_by = delta.days

distance_goal = 2500
monthly_goal = distance_goal/12
daily_goals = distance_goal/365

# Cumulative distance per day
grouped_by_day = processed_data.groupby(['year', 'month', 'day']).agg({'distance': 'sum'}).reset_index()
# Daily cumulative distance
grouped_by_day['Cumulative Distance'] = grouped_by_day.groupby(['year'])['distance'].cumsum()

should_be_reached = daily_goals*days_gone_by


# today_month = months[today - 1]
today_year = dt.datetime.today().year
print(f"Today's month is the {this_month}th month and year is {today_year}")


where_i_am = grouped_by_day[(grouped_by_day.year == today_year) & (grouped_by_day.month == this_month)]['Cumulative Distance'].max()
print(f"I should have reached {should_be_reached} miles. I've done {where_i_am} miles")





In [None]:
grouped_by_day

In [None]:
# Plotting yearly progressions

metrics = ['Cumulative Distance', 'Cumulative Elevation']

selected_metric = 'Cumulative Distance'
cycling = grouped_by_year_and_month[grouped_by_year_and_month.type.isin(['Ride'])]
fig = px.line(cycling, x='month', y=selected_metric, color='year')
fig.show()

In [None]:
# Plotting cummulative distance by month
sns.lineplot(x='month', y='Cumulative Distance', data=grouped_by_year_and_month, hue='year')

In [None]:
#Comverting start_loca-date to datetime
processed_data['start_date_local'] = pd.to_datetime(processed_data['start_date_local'])
processed_data['start_date_local'] = processed_data['start_date_local'].dt.strftime('%m-%d-%Y')
processed_data

In [None]:
processed_data.columns

In [None]:
###########################
# STRAVA LIFETIME SUMMARY #
###########################

# Total number of activities
start_date = processed_data.year.min()
burger_calories = 354
total_activities = processed_data.id.count()
num_rides = (processed_data.type == 'Ride').sum()
num_runs = (processed_data.type == 'Workout').sum()
distance_traveled = processed_data.distance.sum().astype(int)
earth_circumference = 24901 # earth circumference in miles 
perc_around_the_earth = (distance_traveled / earth_circumference)
total_kudos = processed_data.kudos_count.sum()
total_climbing = processed_data.total_elevation_gain.sum().astype(int)
power_output = processed_data.average_watts.sum()


print(f'Strava user since: {start_date}')
print(f'Total number of activities: {total_activities}')
print(f'Total distance traveled: {"{:,}".format(distance_traveled)} miles or {"{:.0%}".format(perc_around_the_earth)} of the earth circumference')
print(f'Total climbing: {"{:,}".format(total_climbing)} feet')


print(f'Number of Rides: {num_rides}')
print(f'Number of Runs: {num_runs}')


# Ratio of activities by type
processed_data['type'].value_counts(normalize=True)

# Breakdown of activities by type
processed_data['type'].value_counts()

# Total distance by activity type
total_by_activity = processed_data.groupby('type')['distance'].sum().sort_values(ascending=False)

In [None]:
# Activities pie chart

grouped_by_type = processed_data.groupby('type').agg({'type': 'count'}).rename(columns={'type': 'total'}).sort_values('total', ascending=False).reset_index()
grouped_by_type.loc[grouped_by_type.total < 20, 'type'] = 'Other'

pie_df = grouped_by_type.groupby('type').agg({'total': 'sum'}).rename(columns={'total': 'total'}).reset_index()

breakdown_by_type = processed_data['type'].value_counts().sort_values(ascending=True)

fig, ax = plt.subplots()

explode = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
strava_color_palette = ['#45738F', '#FC4C02', '#3A18B0', '#FFAA06', '#F5674E']
textprops = {"fontsize":12, 'fontweight':'bold', 'color':'grey'}

plt.pie(pie_df.total, labels=pie_df.type, autopct=lambda p: format(p, '.1f') if p>1 else None, pctdistance=0.8, colors=strava_color_palette, textprops=textprops, startangle=30)
ax.axis('equal')
ax.add_artist(plt.Circle((0, 0), 0.6, color='white'))

for text in ax.texts:
    text.set_color('grey')


fig.show()

In [None]:
pie_df

In [None]:
grouped_by_type = processed_data.groupby('type').agg({'type': 'count'}).rename(columns={'type': 'total'}).sort_values('total', ascending=False).reset_index()
grouped_by_type.loc[grouped_by_type.total < 20, 'type'] = 'Other'

pie_df = grouped_by_type.groupby('type').agg({'total': 'sum'}).rename(columns={'total': 'total'}).reset_index()

activities = pie_df.type
breakdown_by_type = pie_df.total
import plotly.graph_objects as go
fig = go.Figure(data=[go.Pie(labels=activities, values=breakdown_by_type, hole=.6)])
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(showlegend=False)
fig.show()

In [None]:
# Activity Breakdown by type
# Chart of all activities by type

fig = px.bar(breakdown_by_type, y=breakdown_by_type.index, x=breakdown_by_type.values, text_auto='', orientation='h') # Plotly Express
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.update_layout(
    yaxis_title="",
    xaxis_title="Total Activities",
    font=dict(
        family="Arial",
        size=14,
    ),
    plot_bgcolor='white',
    paper_bgcolor='#FFFFFF',
    margin_l=0,
    margin_r=10,
    margin_t=0,
    margin_b=50,
    width=500,
    uniformtext_minsize=8, 
    uniformtext_mode='hide',
    hovermode=False,

)
fig.update_traces(marker_color='#FC4C02')
                  
fig.update_yaxes(showticklabels=True)
fig.update_xaxes(showticklabels=False)
fig.show()

In [None]:
breakdown_by_type = list(processed_data['type'].value_counts())
activities = list(processed_data['type'].unique())
activities