## 2020 Strava Data

## Install dependencies

In [None]:
# !pip install pandas
# !pip install matplotlib

In [2]:
# import time
# time.sleep(10)

In [None]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import datetime
import time
import math
import matplotlib.dates as mdates
from matplotlib import (pyplot as plt, figure)
from matplotlib.dates import DateFormatter

In [None]:
data = pd.read_csv('strava_activities.csv')
data.head()

## Clean Up Data

In [None]:
# select columns
selected_columns = ['Activity Date', 'Elapsed Time', 'Distance', 'Relative Effort', 'Distance']
data_filtered = data[selected_columns].copy()
data_filtered = data_filtered.loc[:,~data_filtered.columns.duplicated()]

# remove empty rides
data_filtered.query('Distance > 0',  inplace=True)

# convert time to minutes
data_filtered['Elapsed Time'] = data_filtered['Elapsed Time'].div(60).astype(int)

# convert distance to miles
data_filtered['Distance'] = data_filtered['Distance'].div(1.609344).round(1)

# remove decimal on relative
# data_filtered['Relative Effort'] = data_filtered['Relative Effort'].astype(str).str[:-2]
# data_filtered['Relative Effort'] = data_filtered['Relative Effort'].astype(float)
# data_filtered['Relative Effort'] = pd.to_numeric(data_filtered['Relative Effort'] ,errors='coerce')
# data_filtered['Relative Effort']

data_filtered.tail()

## Filter Data to 2020

In [None]:
# convert date column to time object
data_filtered['Activity Date'] = pd.to_datetime(data_filtered['Activity Date'])

# filter out non 2020 dates
df = data_filtered[(data_filtered['Activity Date'].dt.year == 2020)]

df.tail(10)

## Plot data

In [None]:
# setup summary
summary = df.copy()
summary['Activity Date'] = pd.to_datetime(summary['Activity Date'])
summary.index = summary['Activity Date'] 
summary = summary.resample('M').sum()

# daily
daily_x = df['Activity Date']
daily_y1 = df['Distance']
daily_y2 = df['Relative Effort']
daily_y3 = df['Elapsed Time']

# summary
summary_x = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'July', 'Aug', 'Sept', 'Oct', 'Nov', 'Dec']
summary_y1 = summary['Distance']
summary_y2 = summary['Relative Effort']
summary_y3 = summary['Elapsed Time']

# figure
fig,(daily_ax1, daily_ax2, daily_ax3) = plt.subplots(nrows=1, ncols=3, figsize=(8.5,3))
fig2,(summary_ax1, summary_ax2, summary_ax3) = plt.subplots(nrows=1, ncols=3, figsize=(8.5,3))
fig.suptitle('2020 Strava Workouts')
fig.tight_layout(rect=[0, 0, 1, 0.84])
fig2.tight_layout(rect=[0, 0, 1, 0.84])

# daily scatter
daily_ax1.scatter(daily_x, daily_y1, s=8, c='#99999950', label='Distance')
daily_ax2.scatter(daily_x, daily_y2, s=8, c='#007acc50', label='Relative Effort')
daily_ax3.scatter(daily_x, daily_y3, s=8, c='#f9826c50', label='Elapsed Time')
daily_ax1.set_title('Distance')
daily_ax2.set_title('Effort')
daily_ax3.set_title('Duration')

# summary bars
summary_ax1.bar(summary_x, summary_y1, width=0.4, color='#999999')
summary_ax2.bar(summary_x, summary_y2, width=0.4, color='#007acc')
summary_ax3.bar(summary_x, summary_y3, width=0.4, color='#f9826c')
summary_ax1.set_title('Monthly Distance')
summary_ax2.set_title('Monthly Effort')
summary_ax3.set_title('Monthly Duration')

date_form = DateFormatter("%m-%d")
daily_ax1.xaxis.set_major_formatter(date_form)
daily_ax1.xaxis.set_major_locator(mdates.WeekdayLocator(interval=6))
daily_ax2.xaxis.set_major_formatter(date_form)
daily_ax2.xaxis.set_major_locator(mdates.WeekdayLocator(interval=6))
daily_ax3.xaxis.set_major_formatter(date_form)
daily_ax3.xaxis.set_major_locator(mdates.WeekdayLocator(interval=6))

In [None]:
highest = df.sort_values(by='Elapsed Time', ascending=False)
highest