## STRAVA DATA WITH PYTHON

The following is an excercise to practice python, and use my own running data logged in Strava to do some visualization tests. Huge thanks to Fran Polignano for the code below.

In [2]:
#the code below allow us to get data from the Strava API, with tokens that won't expire.

import requests
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

auth_url = "https://www.strava.com/oauth/token"
activities_url = "https://www.strava.com/api/v3/athlete/activities"


# the request library has a resource to pass the parameters of the request with a dictionary.
# payload is the way to do it. Its a clean dictionary that avoids having to write the var1=value&var2=value2

payload = {
    'client_id': 'xxxx',
    'client_secret': 'xxxx',
    'refresh_token': 'xxxxxxxxxxxxxxxxxxxx',
    'grant_type': "refresh_token",
    'f': 'json'
}

# this part of the code is to get the access token (which is temporary) by using the permanent refresh tok
print("Requesting Token...\n")

# res will use the requests.post with the authorization url, payload data and grab the access token.
res = requests.post(auth_url, data=payload, verify=False)

# this variable is the access token which we grabbed from the step above
access_token = res.json()['access_token']
print("Access Token = {}... \n".format(access_token))

# header and params are inputs required for the GET command, a standard API command.

header = {'Authorization': 'Bearer ' + access_token}
param = {'per_page': 200, 'page': 1}
param2 = {'per_page': 200, 'page': 2}
param3 = {'per_page': 180, 'page': 3}
my_dataset = requests.get(activities_url, headers=header, params=param).json()
my_dataset2 = requests.get(activities_url, headers=header, params=param2).json()
my_dataset3 = requests.get(activities_url, headers=header, params=param3).json()

type(my_dataset)

Requesting Token...

Access Token = ac50316c59adc455d1da15880fbaed8c969f5fa2... 



list

In [3]:
my_dataset_total = my_dataset + my_dataset2 + my_dataset3

In [4]:
    # test for polyline maps
    
    # print(my_dataset[0]['map']['summary_polyline'])

In [5]:
## This piece of code is to find all the variables we can potentially use

sample = list(my_dataset_total)[0]

dict(sample)

{'resource_state': 2,
 'athlete': {'id': 23238804, 'resource_state': 1},
 'name': '5K + bonus (got lost 😅)',
 'distance': 5612.9,
 'moving_time': 1515,
 'elapsed_time': 1742,
 'total_elevation_gain': 15.8,
 'type': 'Run',
 'workout_type': 3,
 'id': 3305913528,
 'external_id': 'BD78A927-4C4C-4CFA-9720-69E1BB120CF3-activity.fit',
 'upload_id': 3533642334,
 'start_date': '2020-04-16T08:08:38Z',
 'start_date_local': '2020-04-16T18:08:38Z',
 'timezone': '(GMT+10:00) Australia/Melbourne',
 'utc_offset': 36000.0,
 'start_latlng': [-37.796297, 144.985275],
 'end_latlng': [-37.795985, 144.984761],
 'location_city': None,
 'location_state': None,
 'location_country': None,
 'start_latitude': -37.796297,
 'start_longitude': 144.985275,
 'achievement_count': 3,
 'kudos_count': 4,
 'comment_count': 0,
 'athlete_count': 1,
 'photo_count': 0,
 'map': {'id': 'a3305913528',
  'summary_polyline': 'zaueF}l|sZGHAF?bAIJELSTMr@IVAJDl@Qn@StA?hACtAETKHYDi@Ik@Ey@Mc@QUGI?GDELMt@CV?nACTIBk@MWSG?OJOb@EBS@iAQwAMmC

In [6]:
# Lets select the attributes I'm interested in:

my_cols =['name', 
          'average_speed',
          'distance',
          'elapsed_time',
          'total_elevation_gain',
          'type', 
          'start_date_local',
          'moving_time',
          'kudos_count',
          'total_elevation_gain',
          'achievement_count',
          'start_date',
          'id'
          
         ]

print(my_cols)

['name', 'average_speed', 'distance', 'elapsed_time', 'total_elevation_gain', 'type', 'start_date_local', 'moving_time', 'kudos_count', 'total_elevation_gain', 'achievement_count', 'start_date', 'id']


In [7]:
# now lets create an iterator over all the activities and save the values we are interested in:

# create an empty list call data #  

final_data = []    

# create a for loop to append all the activity from "my_dataset" to this final data var

for activity in my_dataset_total:
    #transform each activity into a dictionary format
    my_dict = dict(activity)
    #this is a list crompreshension that will append each element in my_dict, for keys in my_cols only
    final_data.append([my_dict.get(x) for x in my_cols])



In [8]:
# now we can import pandas, with columns equal to my_cols

import pandas as pd

df = pd.DataFrame(final_data, columns=my_cols)

## lets take a peek at the top entries

df.head(10)

Unnamed: 0,name,average_speed,distance,elapsed_time,total_elevation_gain,type,start_date_local,moving_time,kudos_count,total_elevation_gain.1,achievement_count,start_date,id
0,5K + bonus (got lost 😅),3.705,5612.9,1742,15.8,Run,2020-04-16T18:08:38Z,1515,4,15.8,3,2020-04-16T08:08:38Z,3305913528
1,Morning Run,3.66,5080.7,1637,14.9,Run,2020-04-15T08:30:48Z,1388,2,14.9,0,2020-04-14T22:30:48Z,3300003250
2,Easter 5K 💪🏽,3.697,5020.4,1684,32.9,Run,2020-04-13T10:17:08Z,1358,3,32.9,0,2020-04-13T00:17:08Z,3291570405
3,Morning Run,3.744,4796.6,1418,32.4,Run,2020-04-11T10:32:23Z,1281,4,32.4,0,2020-04-11T00:32:23Z,3282179069
4,Sunny 7K 🌞,3.662,7148.9,2522,88.3,Run,2020-04-09T12:58:08Z,1952,4,88.3,2,2020-04-09T02:58:08Z,3273881162
5,Morning Run,3.527,3791.2,1791,33.2,Run,2020-04-08T08:20:26Z,1075,1,33.2,0,2020-04-07T22:20:26Z,3268890838
6,Afternoon Run,3.498,5800.4,1754,62.9,Run,2020-04-06T17:41:22Z,1658,2,62.9,0,2020-04-06T07:41:22Z,3261372898
7,Morning Run,3.504,6114.3,1995,58.5,Run,2020-04-05T10:42:11Z,1745,4,58.5,0,2020-04-05T00:42:11Z,3256228900
8,5K 👍🏽,3.756,5006.3,1658,47.5,Run,2020-04-03T11:44:21Z,1333,5,47.5,0,2020-04-03T00:44:21Z,3248358106
9,Morning Run,3.635,5005.9,1516,44.6,Run,2020-04-02T10:21:11Z,1377,4,44.6,0,2020-04-01T23:21:11Z,3244367332


## Conversions

In [9]:
# convert distance in meters to kilometers
df.loc[:,'distance'] = df.distance / 1000

# convert moving in seconds time to minutes
df.loc[:,'moving_time'] = df.moving_time / 60

# convert average speed in meters per second to minutes per kilometer
df.loc[:,'average_speed_mpk'] = 16.666 / df.average_speed

# create new column average speed in kilometers per hour
df.loc[:,'average_speed_kph'] = 60 / df.average_speed_mpk

df.head()

Unnamed: 0,name,average_speed,distance,elapsed_time,total_elevation_gain,type,start_date_local,moving_time,kudos_count,total_elevation_gain.1,achievement_count,start_date,id,average_speed_mpk,average_speed_kph
0,5K + bonus (got lost 😅),3.705,5.6129,1742,15.8,Run,2020-04-16T18:08:38Z,25.25,4,15.8,3,2020-04-16T08:08:38Z,3305913528,4.498246,13.338534
1,Morning Run,3.66,5.0807,1637,14.9,Run,2020-04-15T08:30:48Z,23.133333,2,14.9,0,2020-04-14T22:30:48Z,3300003250,4.553552,13.176527
2,Easter 5K 💪🏽,3.697,5.0204,1684,32.9,Run,2020-04-13T10:17:08Z,22.633333,3,32.9,0,2020-04-13T00:17:08Z,3291570405,4.507979,13.309732
3,Morning Run,3.744,4.7966,1418,32.4,Run,2020-04-11T10:32:23Z,21.35,4,32.4,0,2020-04-11T00:32:23Z,3282179069,4.451389,13.478939
4,Sunny 7K 🌞,3.662,7.1489,2522,88.3,Run,2020-04-09T12:58:08Z,32.533333,4,88.3,2,2020-04-09T02:58:08Z,3273881162,4.551065,13.183727


In [10]:
# Correct the datetime format, refer to this link for more details:
# https://stackoverflow.com/questions/33365055/attributeerror-can-only-use-dt-accessor-with-datetimelike-values

# I am converting start_date to pandas datetime format, so I can use it in plots later.

df['start_date'] = pd.to_datetime(df['start_date'], errors='coerce')

#format date to be used in plots later

df.loc[:,'start_date_formatted'] = df.start_date.dt.strftime('%b %Y')
df.loc[:,'start_date_year'] = df.start_date.dt.year
df.loc[:,'start_date_month'] = df.start_date.dt.month
df.loc[:,'start_date_weekday'] = df.start_date.dt.weekday


## Describe

In [11]:
df.groupby('type')['distance','moving_time','average_speed_kph'].describe().T

  """Entry point for launching an IPython kernel.


Unnamed: 0,type,Ride,Run,Workout
distance,count,77.0,273.0,12.0
distance,mean,5.911553,6.370428,0.0
distance,std,2.198835,2.781474,0.0
distance,min,2.6246,0.0076,0.0
distance,25%,5.3227,4.7966,0.0
distance,50%,5.5339,5.6285,0.0
distance,75%,5.6784,7.1489,0.0
distance,max,15.6946,22.0153,0.0
moving_time,count,77.0,273.0,12.0
moving_time,mean,21.013853,30.520696,47.705556


## Correlations

In [12]:
df.corr()

Unnamed: 0,average_speed,distance,elapsed_time,total_elevation_gain,moving_time,kudos_count,total_elevation_gain.1,achievement_count,id,average_speed_mpk,average_speed_kph,start_date_year,start_date_month,start_date_weekday
average_speed,1.0,0.351762,-0.006755,0.437587,-0.336966,-0.012077,0.437587,0.123932,0.368216,-0.920133,1.0,0.295253,0.067849,-0.144801
distance,0.351762,1.0,0.082173,0.786909,0.710913,0.287184,0.786909,0.409034,0.170798,-0.084342,0.351762,0.061834,-0.066009,0.173653
elapsed_time,-0.006755,0.082173,1.0,0.052811,0.085822,0.075226,0.052811,0.035851,0.044532,0.010644,-0.006755,0.052533,-0.039948,-0.003788
total_elevation_gain,0.437587,0.786909,0.052811,1.0,0.46966,0.242535,1.0,0.286353,0.296826,-0.255605,0.437587,0.213418,-0.033275,0.048611
moving_time,-0.336966,0.710913,0.085822,0.46966,1.0,0.258599,0.46966,0.307656,-0.008442,0.227816,-0.336966,-0.039555,-0.148575,0.248537
kudos_count,-0.012077,0.287184,0.075226,0.242535,0.258599,1.0,0.242535,0.176512,0.19953,0.060434,-0.012077,0.231612,-0.183396,0.162416
total_elevation_gain,0.437587,0.786909,0.052811,1.0,0.46966,0.242535,1.0,0.286353,0.296826,-0.255605,0.437587,0.213418,-0.033275,0.048611
achievement_count,0.123932,0.409034,0.035851,0.286353,0.307656,0.176512,0.286353,1.0,-0.111871,-0.034017,0.123932,-0.081812,-0.030966,0.046333
id,0.368216,0.170798,0.044532,0.296826,-0.008442,0.19953,0.296826,-0.111871,1.0,-0.343686,0.368216,0.90761,-0.310458,-0.099162
average_speed_mpk,-0.920133,-0.084342,0.010644,-0.255605,0.227816,0.060434,-0.255605,-0.034017,-0.343686,1.0,-0.920133,-0.307781,0.073292,0.156322


In [13]:
df

Unnamed: 0,name,average_speed,distance,elapsed_time,total_elevation_gain,type,start_date_local,moving_time,kudos_count,total_elevation_gain.1,achievement_count,start_date,id,average_speed_mpk,average_speed_kph,start_date_formatted,start_date_year,start_date_month,start_date_weekday
0,5K + bonus (got lost 😅),3.705,5.6129,1742,15.8,Run,2020-04-16T18:08:38Z,25.250000,4,15.8,3,2020-04-16 08:08:38+00:00,3305913528,4.498246,13.338534,Apr 2020,2020,4,3
1,Morning Run,3.660,5.0807,1637,14.9,Run,2020-04-15T08:30:48Z,23.133333,2,14.9,0,2020-04-14 22:30:48+00:00,3300003250,4.553552,13.176527,Apr 2020,2020,4,1
2,Easter 5K 💪🏽,3.697,5.0204,1684,32.9,Run,2020-04-13T10:17:08Z,22.633333,3,32.9,0,2020-04-13 00:17:08+00:00,3291570405,4.507979,13.309732,Apr 2020,2020,4,0
3,Morning Run,3.744,4.7966,1418,32.4,Run,2020-04-11T10:32:23Z,21.350000,4,32.4,0,2020-04-11 00:32:23+00:00,3282179069,4.451389,13.478939,Apr 2020,2020,4,5
4,Sunny 7K 🌞,3.662,7.1489,2522,88.3,Run,2020-04-09T12:58:08Z,32.533333,4,88.3,2,2020-04-09 02:58:08+00:00,3273881162,4.551065,13.183727,Apr 2020,2020,4,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
357,Carrera de noche,3.363,5.6631,1789,43.3,Run,2016-11-02T19:04:48Z,28.066667,0,43.3,1,2016-11-02 08:04:48+00:00,1106602454,4.955694,12.107284,Nov 2016,2016,11,2
358,Carrera de noche,3.293,8.1100,2592,63.2,Run,2016-10-24T18:30:28Z,41.050000,0,63.2,0,2016-10-24 07:30:28+00:00,1106602462,5.061039,11.855274,Oct 2016,2016,10,0
359,Carrera de mañana,0.000,0.0076,0,0.0,Run,2013-04-07T08:47:03Z,0.000000,0,0.0,0,2013-04-07 11:47:03+00:00,1106602432,inf,0.000000,Apr 2013,2013,4,6
360,Carrera de mañana,3.805,21.3313,5606,160.8,Run,2013-04-07T06:04:23Z,93.433333,0,160.8,8,2013-04-07 09:04:23+00:00,1106602567,4.380026,13.698548,Apr 2013,2013,4,6


## Plots

In [14]:
# import altair to graph

import altair as alt
print(alt.__version__)
alt.renderers.enable('default')
print(alt.renderers.active)


4.1.0
default


In [15]:
data = df.loc[df.type=='Run',:][['id','start_date_formatted','average_speed_mpk']]
data.loc[:,'average_speed_mpk'] = data.average_speed_mpk.apply(lambda d: round(d,1))

bar = alt.Chart(data).mark_bar().encode(
    alt.X('average_speed_mpk:Q', bin=alt.Bin(step=.25)),
    alt.Y('count()'),
    tooltip=['average_speed_mpk:Q','count()']
)

bar.title = 'Average Speed (min. per km.) by Count of Runs'

rule = alt.Chart(data).mark_rule(color='orange').encode(
    x='mean(average_speed_mpk):Q',
    size=alt.value(2)
)

alt.layer(
    bar,
    rule
)

In [16]:
data = df.loc[df.type=='Run',:][['id','start_date_formatted','distance']]
data.loc[:,'distance'] = data.distance.apply(lambda d: round(d))

bar = alt.Chart(data).mark_bar().encode(
    alt.X('distance:Q', bin=alt.Bin(step=1)),
    alt.Y('count()'),
    tooltip=['distance:Q','count()']
)

bar.title = 'Distance (km.) by Count of Runs'

rule = alt.Chart(data).mark_rule(color='orange').encode(
    x='mean(distance):Q',
    size=alt.value(2)
)

alt.layer(
    bar,
    rule
)

In [17]:
data = df.loc[df['type'].isin(['Run','Ride']),:]
data = data[['id','start_date_formatted','type','distance','achievement_count']]

chart = alt.Chart(data).transform_calculate(
    url='https://www.strava.com/activities/' + alt.datum.id
).mark_circle().encode(
    x='distance',
    y='achievement_count',
    color='type',
    href='url:N',
    tooltip=['start_date_formatted','distance','achievement_count','url:N']
).facet(
    column='type'
).resolve_scale(
    x='independent',
    y='independent'
)

chart.title = 'Activities by Acheivement Count and Distance (click circle to go to activity)'

chart


In [18]:
data = df.loc[df['type'].isin(['Run','Ride']),:]
data = data[['id','start_date_formatted','type','distance','achievement_count']]

chart = alt.Chart(data).transform_calculate(
    url='https://www.strava.com/activities/' + alt.datum.id
).mark_circle().encode(
    x='distance',
    y='achievement_count',
    color='type',
    href='url:N',
    tooltip=['start_date_formatted','distance','achievement_count','url:N']
).facet(
    column='type'
).resolve_scale(
    x='independent',
    y='independent'
)

chart.title = 'Activities by Acheivement Count and Distance (click circle to go to activity)'

chart
data_run = df.loc[df['type'] == 'Run',:]
data_run = data_run[['id','start_date_formatted','type','distance','average_speed_kph']]

chart_run = alt.Chart(data_run).transform_calculate(
    url='https://www.strava.com/activities/' + alt.datum.id
).mark_circle().encode(
    x='distance',
    y='average_speed_kph',
    color='type',
    href='url:N',
    tooltip=['start_date_formatted','distance','average_speed_kph','url:N']
).interactive()

chart_run.title = 'Runs'

data_ride = df.loc[df['type'] == 'Ride',:]
data_ride = data_ride[['id','start_date_formatted','type','distance','average_speed_kph']]

chart_ride = alt.Chart(data_ride).transform_calculate(
    url='https://www.strava.com/activities/' + alt.datum.id
).mark_circle().encode(
    x='distance',
    y='average_speed_kph',
    color='type',
    href='url:N',
    tooltip=['start_date_formatted','distance','average_speed_kph','url:N']
).interactive()

chart_ride.title = 'Rides'

chart = alt.hconcat(chart_run, chart_ride)

chart.title = 'Activity Type by Average Speed and Distance (click circle to go to activity)'

chart

In [19]:
data = df[['id','type','start_date','distance','moving_time']].copy()
data.loc[:,'month'] = data.loc[:,'start_date'].dt.strftime('%Y-%m-01')

chart = alt.Chart(data).mark_bar().encode(
    x='month',
    y='count(id)',
    color='type',
    tooltip=['month','type','count(id)']
).interactive()

chart.title = 'Activities over Time'

chart

In [20]:
data = df[['id','type','start_date_weekday','moving_time','distance']].copy()

chart_activities = alt.Chart(data).mark_bar().encode(
    x='start_date_weekday',
    y='count(id)',
    color='type',
    tooltip=['start_date_weekday','type','count(id)']
).interactive()

chart_time = alt.Chart(data).mark_bar().encode(
    x='start_date_weekday',
    y='sum(moving_time)',
    color='type',
    tooltip=['start_date_weekday','type','sum(moving_time)']
).interactive()

chart_distance = alt.Chart(data).mark_bar().encode(
    x='start_date_weekday',
    y='sum(distance)',
    color='type',
    tooltip=['start_date_weekday','type','sum(distance)']
).interactive()

chart_activities.title = 'Number of Activities by Weekday'
chart_time.title = 'Total Moving Time by Weekday'
chart_distance.title = 'Total Distance by Weekday'

chart = alt.hconcat(chart_activities, chart_time, chart_distance)

chart