# Strava Activities Downloader

Download all of your Strava Activites and Export to CSV

-----

## References: 

* https://developers.strava.com/docs/
* Alternative Library: https://github.com/hozn/stravalib/

------

## Dependencies and Libraries

In [1]:
import requests
import time
import datetime
from datetime import date
import collections
import json

In [2]:
import numpy as np, string, re, pytz
import pandas as pd

import matplotlib.pyplot as plt
from datetime import datetime
%matplotlib inline

------

## Authorization / Credentials

In [3]:
import json

with open("credentials.json", "r") as file:
    credentials = json.load(file)
    strava_cr = credentials['strava']
    ACCESS_TOKEN = strava_cr['ACCESS_TOKEN']

-----

# Athlete Info

SEE: https://developers.strava.com/docs/reference/#api-Athletes

In [4]:
def get_strava_athlete():
    url = 'https://www.strava.com/api/v3/athlete/'
    header = {'Authorization': 'Bearer '+ACCESS_TOKEN}
    param = {'per_page':200, 'page':1}
    dataset = requests.get(url, headers=header, params=param).json()
    return dataset

In [5]:
athlete = get_strava_athlete()

In [6]:
firstname = athlete.get('firstname')
athlete_id = athlete.get('id')
print(f"Hi, {firstname} (ID: {athlete_id})")

Hi, Mark (ID: 13740897)


----

# Athlete Stats

SEE: https://developers.strava.com/docs/reference/#api-Athletes-getStats

In [7]:
def get_strava_athlete_stats(athlete_id):
    url = 'https://www.strava.com/api/v3/athletes/' + str(athlete_id) + '/stats/'
    header = {'Authorization': 'Bearer '+ACCESS_TOKEN}
    param = {'per_page':200, 'page':1}
    dataset = requests.get(url, headers=header, params=param).json()
    return dataset

In [8]:
athlete_stats_dict = get_strava_athlete_stats(athlete_id)

In [19]:
print(athlete_stats_dict)

----

# Get Basic Activities from Strava

Get Basic Activites Summaries from Strava and Export to CSV. 

SEE: https://developers.strava.com/docs/reference/#api-Activities

In [10]:
def convert_timestamp(i):
    return datetime.strptime(i, "%Y-%m-%dT%H:%M:%SZ")

def event_timestamp(i):
    return convert_timestamp(i['start_date_local'])

In [11]:
def get_strava_activites():
    url = 'https://www.strava.com/api/v3/athlete/activities'
    header = {'Authorization': 'Bearer '+ACCESS_TOKEN}
    param = {'per_page':200, 'page':1}
    dataset = requests.get(url, headers=header, params=param).json()
    count = len(dataset)
    if count == 200: #if 200 results come back
        loop_count = 1 #we've already done one loop
        while count == 200: #while it keeps returning 200 results
            loop_count = loop_count + 1 #increase loop_count or page number
            param = {'per_page':200, 'page':loop_count} #reset params
            sub_dataset = requests.get(url, headers=header, params=param).json() #pull new data with sub_dataset name
            dataset = dataset + sub_dataset #combine (Json files, not dictionaries thank jesus)
            count = len(sub_dataset) #count results to see if we need to loop again
    # return {event_timestamp(i): clean_event(i) for i in dataset if wanted_event(i)} #return as normal
    return {event_timestamp(i): i for i in dataset}

In [12]:
# Get Activties Data from Strava
strava_dict = get_strava_activites()
print(f"Extracted {len(strava_dict)} activities from Strava.")

Extracted 743 activities from Strava.


In [13]:
# Create DF and Export to CSV
strava_activites = pd.DataFrame.from_dict(strava_dict, orient='index')
strava_activites.to_csv('data/strava-activities-raw.csv')

In [14]:
strava_activites.columns

Index(['resource_state', 'athlete', 'name', 'distance', 'moving_time',
       'elapsed_time', 'total_elevation_gain', 'type', 'workout_type', 'id',
       'external_id', 'upload_id', 'start_date', 'start_date_local',
       'timezone', 'utc_offset', 'start_latlng', 'end_latlng', 'location_city',
       'location_state', 'location_country', 'start_latitude',
       'start_longitude', 'achievement_count', 'kudos_count', 'comment_count',
       'athlete_count', 'photo_count', 'map', 'trainer', 'commute', 'manual',
       'private', 'flagged', 'gear_id', 'from_accepted_tag', 'average_speed',
       'max_speed', 'has_heartrate', 'average_heartrate', 'max_heartrate',
       'elev_high', 'elev_low', 'pr_count', 'total_photo_count', 'has_kudoed',
       'average_watts', 'kilojoules', 'device_watts', 'average_cadence'],
      dtype='object')

-----

# Get Detailed Activity from Strava

SEE: https://developers.strava.com/docs/reference/#api-Activities-getActivityById

In [15]:
test_activity_id = strava_activites.tail(1).iloc[0]['id']

In [16]:
def get_strava_activity(id):
    url = 'https://www.strava.com/api/v3/activities/' + str(id)
    header = {'Authorization': 'Bearer '+ACCESS_TOKEN}
    param = {'per_page':200, 'page':1}
    dataset = requests.get(url, headers=header, params=param).json()
    return dataset

In [17]:
activity = get_strava_activity(1701792001)

In [18]:
# activity

{'achievement_count': 0,
 'athlete': {'id': 13740897, 'resource_state': 1},
 'athlete_count': 1,
 'available_zones': [],
 'average_heartrate': 149.2,
 'average_speed': 2.648,
 'best_efforts': [{'achievements': [],
   'activity': {'id': 1701792001, 'resource_state': 1},
   'athlete': {'id': 13740897, 'resource_state': 1},
   'distance': 400,
   'elapsed_time': 128,
   'end_index': 177,
   'id': 3652211227,
   'moving_time': 129,
   'name': '400m',
   'pr_rank': None,
   'resource_state': 2,
   'start_date': '2018-07-14T11:11:41Z',
   'start_date_local': '2018-07-14T19:11:41Z',
   'start_index': 125},
  {'achievements': [],
   'activity': {'id': 1701792001, 'resource_state': 1},
   'athlete': {'id': 13740897, 'resource_state': 1},
   'distance': 805,
   'elapsed_time': 262,
   'end_index': 219,
   'id': 3652211230,
   'moving_time': 263,
   'name': '1/2 mile',
   'pr_rank': None,
   'resource_state': 2,
   'start_date': '2018-07-14T11:11:11Z',
   'start_date_local': '2018-07-14T19:11:11Z