# Strava Analysis

I had never used Python to create a dashboard, so wanted to give it a go. So I decide to analyse my own Strava data. 

[You can request a bulk export of your data here.](https://support.strava.com/hc/en-us/articles/216918437-Exporting-your-Data-and-Bulk-Export)

Below you can find the code / notebook I used to do the analysis first. 

In [106]:
# set up and libraries 
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import calendar

# display all the columns in the dataset print outs
pd.options.display.max_columns = None

# read the data in 
act_df = pd.read_csv('data/activities.csv')

# do some data cleaning of the column names
act_df.columns = act_df.columns.str.lower()
act_df.columns = act_df.columns.str.replace(' ', '_')

# only select the columns I want 
df2 = (
    act_df
    .filter(regex = 'activity|distance|commute|time|speed|elevation|grade|temperature')
    .copy()
    .assign(year = pd.DatetimeIndex(df2['activity_date']).year, 
            month = pd.DatetimeIndex(df2['activity_date']).month, 
            month_label = df2['month'].apply(lambda x: calendar.month_abbr[x]),
            time_minutes = df2['moving_time']/60,
            time_hours = df2['moving_time']/3600)
)

df2.drop(columns = list(df2.filter(regex='span_class')), inplace = True)

In [None]:
test = df2
test.assign(test = pd.DatetimeIndex(df2['activity_date']).year, 
            montht = pd.DatetimeIndex(df2['activity_date']).month)

            df2 = (
    act_df
    .filter(regex = 'activity|distance|commute|time|speed|elevation|grade|temperature')
    .copy()
    .assign(year = pd.DatetimeIndex(df2['activity_date']).year, 
            month = pd.DatetimeIndex(df2['activity_date']).month,
            month_label = df2['month'].apply(lambda x: calendar.month_abbr[x]),
            time_minutes = df2['moving_time']/60, 
            time_hours = df2['moving_time']/3600)     

In [None]:
# total number of each over all years
df3 = (
    df2
    .groupby(['activity_type'])['activity_id']
    .count()
    .reset_index(name="count")
    .sort_values('count', ascending = False)
)

# sns.set(font='Helvetica')
# sns.set_style('white')

# sns.barplot(x = 'activity_type', y = 'count', data = df3, palette= 'rocket'); 
# plt.xlabel(' ')
# plt.ylabel(' ')
# plt.title('All time activity counts since 2014\n');
# sns.despine()

In [None]:
# most popular activity each year
all_years = (
    df2
    .groupby(['activity_type', 'year'])['activity_id']
    .count()
    .reset_index(name="count")
    .sort_values(['activity_type','year','count'], ascending = (True,True,False))
)

In [None]:
# total distance and number of rides each year
year_data = (
    df2
    .query("activity_type == 'Ride'")
    .groupby('year')
    .agg(distance_sum = ('distance','sum'), 
         total_rides = ('activity_id', 'count'), 
         time_spent = ('elapsed_time', 'sum'),
         time_spent_moving = ('moving_time', 'sum'),
         mtime_hours = ('time_hours', 'sum'),
         total_elevation = ('elevation_gain', 'sum'), 
         total_commutes = ('commute', 'sum'))
    .reset_index()     
)

In [None]:
sns.set(font='Helvetica')
sns.set_style()

sns.barplot(x = 'year', y = 'distance_sum', data = year_data, palette= 'rocket'); 
plt.xlabel(' ')
plt.ylabel(' ')
plt.title('total distance (km) per year\n');

In [None]:
sns.barplot(x = 'year', y = 'total_rides', data = year_data, palette= 'rocket'); 
plt.xlabel(' ')
plt.ylabel(' ')
plt.title('total number of rides per year\n');

In [None]:
sns.barplot(x = 'year', y = 'mtime_hours', data = year_data, palette= 'rocket'); 
plt.xlabel(' ')
plt.ylabel(' ')
plt.title('total time spent riding (hours) per year\n');

In [None]:
sns.barplot(x = 'year', y = 'total_elevation', data = year_data, palette= 'rocket'); 
plt.xlabel(' ')
plt.ylabel(' ')
plt.title('total elevation per year (ft)\n');

In [None]:
sns.barplot(x = 'year', y = 'total_commutes', data = year_data, palette= 'rocket'); 
plt.xlabel(' ')
plt.ylabel(' ')
plt.title('total number of commutes per year\n');

In [None]:
dfm = year_data.melt(
    id_vars='year', 
    var_name='category',
    value_name='value'
)

In [None]:
# g = sns.FacetGrid(dfm, col="category", col_wrap=3)
# g.map_dataframe(sns.barplot, x="year", y = 'value')

In [None]:
month_data = (
    df2
    .query("activity_type == 'Ride'")
    .groupby(['month','month_label'])
    .agg(distance_sum = ('distance','sum'), 
         total_rides = ('activity_id', 'count'), 
         time_spent = ('elapsed_time', 'sum'),
         time_spent_moving = ('moving_time', 'sum'),
         total_elevation = ('elevation_gain', 'sum'), 
         total_commutes = ('commute', 'sum'),
         mtime_hours = ('time_hours', 'sum'))
    .sort_values('month', ascending = True)
    .reset_index()     
)

month_data

In [None]:
sns.barplot(x = 'month_label', y = 'distance_sum', data = month_data, palette= 'rocket'); 
plt.xlabel(' ')
plt.ylabel(' ')
plt.title('total distance per month (2014-2021)\n');

In [None]:
sns.barplot(x = 'month_label', y = 'total_rides', data = month_data, palette= 'rocket'); 
plt.xlabel(' ')
plt.ylabel(' ')
plt.title('total rides per month (2014-2021)\n');

In [None]:
sns.barplot(x = 'month_label', y = 'total_elevation', data = month_data, palette= 'rocket'); 
plt.xlabel(' ')
plt.ylabel(' ')
plt.title('total elevation (ft) per month (2014-2021)\n');


In [None]:
sns.barplot(x = 'month_label', y = 'total_commutes', data = month_data, palette= 'rocket'); 
plt.xlabel(' ')
plt.ylabel(' ')
plt.title('total commutes per month (2014-2021)\n');


In [None]:
sns.barplot(x = 'month_label', y = 'mtime_hours', data = month_data, palette= 'rocket'); 
plt.xlabel(' ')
plt.ylabel(' ')
plt.title('total time spent (hours) riding per month (2014-2021)\n');

In [None]:
!jupyter nbconvert strava_analysis.ipynb --to html --template classic