<a href="https://colab.research.google.com/github/ncourtel1/strava-urban-mobility-pipeline/blob/main/notebook/01_pipeline_strava.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install stravalib pandas polyline requests folium python-dotenv stravaio wearipedia

Collecting stravalib
  Downloading stravalib-2.4-py3-none-any.whl.metadata (12 kB)
Collecting polyline
  Downloading polyline-2.0.3-py3-none-any.whl.metadata (6.5 kB)
Collecting stravaio
  Downloading stravaio-0.0.9-py2.py3-none-any.whl.metadata (5.1 kB)
Collecting wearipedia
  Downloading wearipedia-0.1.7-py3-none-any.whl.metadata (5.3 kB)
Collecting pint (from stravalib)
  Downloading pint-0.25-py3-none-any.whl.metadata (10 kB)
Collecting maya (from stravaio)
  Downloading maya-0.6.1-py2.py3-none-any.whl.metadata (7.1 kB)
Collecting loguru (from stravaio)
  Downloading loguru-0.7.3-py3-none-any.whl.metadata (22 kB)
Collecting fastapi==0.101 (from wearipedia)
  Downloading fastapi-0.101.0-py3-none-any.whl.metadata (23 kB)
Collecting fbm>=0.3.0 (from wearipedia)
  Downloading fbm-0.3.0-py3-none-any.whl.metadata (6.4 kB)
Collecting garminconnect>=0.2.25 (from wearipedia)
  Downloading garminconnect-0.2.30-py3-none-any.whl.metadata (14 kB)
Collecting garth>=0.5.2 (from wearipedia)
  Down

In [2]:
import pandas as pd
from stravalib import Client
import datetime
import polyline
import urllib
import requests
import json
from stravaio import StravaIO, strava_oauth2
import folium
import logging
import wearipedia

  return datetime.utcnow().replace(tzinfo=utc)

  from scipy.ndimage.filters import uniform_filter1d



## 2. Authentication and Authorization
To obtain access to data, authorization is required. Put in your client id, client secret token and refresh token for your Strava account. We'll use this to extract the data in the sections below.

In [3]:
from dotenv import load_dotenv
import os

# Charger le fichier .env
load_dotenv('.env')

# Récupérer les variables
client_id = os.getenv('STRAVA_CLIENT_ID')
client_secret = os.getenv('STRAVA_CLIENT_SECRET')

In [4]:
params_oauth = {
    "client_id": client_id,
    "response_type": "code",
    "redirect_uri": f"http://localhost:8000/authorization_successful",
    "scope": "read,profile:read_all,activity:read",
    "approval_prompt": "force"
}
values_url = urllib.parse.urlencode(params_oauth)
base_url = 'https://www.strava.com/oauth/authorize'
authorize_url = base_url + '?' + values_url
print(authorize_url)

https://www.strava.com/oauth/authorize?client_id=181205&response_type=code&redirect_uri=http%3A%2F%2Flocalhost%3A8000%2Fauthorization_successful&scope=read%2Cprofile%3Aread_all%2Cactivity%3Aread&approval_prompt=force


In [5]:
AUTHORIZATION_CODE = "a56a2ed538dcacdfab7bf50178ca8619516c766b" #@param {type: "string"}

In [6]:
def refresh_strava_access_token(client_id, client_secret, AUTHORIZATION_CODE):
    """
    Refresh the Strava access token using the refresh token.

    Args:
        client_id (str): Your Strava API client ID
        client_secret (str): Your Strava API client secret
        refresh_token (str): The refresh token

    Returns:
        str: The new access token
    """
    url = "https://www.strava.com/api/v3/oauth/token"
    payload = {
        'client_id': client_id,
        'client_secret': client_secret,
        'code': AUTHORIZATION_CODE,
        'grant_type': 'authorization_code'
    }

    try:
        response = requests.post(url, data=payload)
        response.raise_for_status()
        data = response.json()

        new_access_token = data.get('access_token')
        new_refresh_token = data.get('refresh_token') # Get the new refresh token as well

        if not new_access_token:
            print("Warning: No new access token found in response")

        if not new_refresh_token:
            print("Warning: No new refresh token found in response")

        global refresh_token
        refresh_token = new_refresh_token

        return new_access_token

    except requests.exceptions.RequestException as e:
        print(f"Error making request to refresh token: {e}")
        return None

# Refresh the access token
global access_token
access_token = refresh_strava_access_token(client_id, client_secret, AUTHORIZATION_CODE)

if access_token:
    print("New access token obtained.")
    print("New access token:", access_token)
    print("New refresh token:", refresh_token)
else:
    print("Failed to obtain a new access token.")

New access token obtained.
New access token: 009adbd512b0952c222a1aa1f918bbbbf1537aa7
New refresh token: 91bf1b3654e117efc7b7bfb3d4fc9c97fd174b26


## 3. Data Extraction

### En utilisant Stravalib

In [10]:
from stravalib.client import Client
import pandas as pd
import time

client = Client(access_token=access_token)
client.refresh_token = refresh_token

activities = client.get_activities(limit=50)
segments_data = []

for activity in activities:
    try:
        detailed_activity = client.get_activity(activity.id)

        if hasattr(detailed_activity, "segment_efforts") and detailed_activity.segment_efforts:
            for effort in detailed_activity.segment_efforts:
                seg = effort.segment

                # conversion sécurisée pour les floats
                distance = None
                try:
                    distance = float(seg.distance)
                except Exception:
                    distance = None

                segments_data.append({
                    "activity_id": activity.id,
                    "activity_name": activity.name,
                    "activity_type": activity.type,
                    "segment_id": getattr(seg, "id", None),
                    "segment_name": getattr(seg, "name", None),
                    "distance_m": distance,
                    "avg_grade": getattr(seg, "average_grade", None),
                    "elev_high": getattr(seg, "elevation_high", None),
                    "elev_low": getattr(seg, "elevation_low", None),
                    "start_lat": getattr(seg.start_latlng, "lat", None) if seg.start_latlng else None,
                    "start_lng": getattr(seg.start_latlng, "lon", None) if seg.start_latlng else None,
                    "end_lat": getattr(seg.end_latlng, "lat", None) if seg.end_latlng else None,
                    "end_lng": getattr(seg.end_latlng, "lon", None) if seg.end_latlng else None,
                    "starred": getattr(seg, "starred", None),
                })

        time.sleep(1)  # limite API

    except Exception as e:
        print(f"Erreur sur l’activité {activity.id}: {e}")
        continue

df_segments = pd.DataFrame(segments_data)
print(f"{len(df_segments)} segments extraits.")
df_segments.head()


  return datetime.utcnow().replace(tzinfo=utc)



272 segments extraits.


Unnamed: 0,activity_id,activity_name,activity_type,segment_id,segment_name,distance_m,avg_grade,elev_high,elev_low,start_lat,start_lng,end_lat,end_lng,starred
0,16150214950,Prépa Trail 42km W2S2,root='Run',33911037,Robec jusqu'au parc St Paul,915.1,0.2,12.2,9.0,49.441453,1.115217,49.439174,1.126928,
1,16150214950,Prépa Trail 42km W2S2,root='Run',13262340,rue des petites eaux,1612.6,-0.1,17.8,10.0,49.438435,1.133888,49.441569,1.115287,
2,16150214950,Prépa Trail 42km W2S2,root='Run',17423013,robec sprint,342.1,-0.3,11.0,10.0,49.439213,1.126969,49.440369,1.122659,
3,16150214950,Prépa Trail 42km W2S2,root='Run',19200614,Try to catch the big fish,839.9,-0.1,17.7,14.1,49.439263,1.12683,49.441727,1.116123,
4,16150214950,Prépa Trail 42km W2S2,root='Run',9916665,Rue des sapins,970.2,11.4,247.6,137.2,49.44243,1.113076,49.448991,1.119666,


In [13]:
df_segments.to_csv("data/strava_segments.csv", index=False)
df_segments.to_excel("data/strava_segments.xlsx", index=False)

### En utilisant wearipedia

In [14]:
#set start and end dates
start_date='2021-09-01' #@param {type:"string"}
end_date='2025-10-31' #@param {type:"string"}
synthetic = False #@param {type:"boolean"}

In [77]:
device = wearipedia.get_device("strava/strava")

if not synthetic:
    device.authenticate({
    'client_id':client_id,
    'client_secret':client_secret,
    'refresh_token':refresh_token
    })

params = {"start_date": start_date, "end_date": end_date}

distance = device.get_data("distance", params=params)
moving_time = device.get_data("moving_time", params=params)
elapsed_time = device.get_data("elapsed_time", params=params)
total_elevation_gain = device.get_data("total_elevation_gain", params=params)
average_speed = device.get_data("average_speed", params=params)
max_speed = device.get_data("max_speed", params=params)
average_heartrate = device.get_data("average_heartrate", params=params)
max_heartrate = device.get_data("max_heartrate", params=params)
map_summary_polyline = device.get_data("map_summary_polyline", params=params)
elev_high = device.get_data("elev_high", params=params)
elev_low = device.get_data("elev_low", params=params)
average_cadence = device.get_data("average_cadence", params=params)
average_watts = device.get_data("average_watts", params=params)
kilojoules = device.get_data("kilojoules", params=params)


  return datetime.utcnow().replace(tzinfo=utc)



Requesting Token...

Access Token = 8dd63b88fd47b1cadf92f824cfaac5191a95fda9



  return datetime.utcnow().replace(tzinfo=utc)



## 4. Data Exporting

Exporting to CSV/XLSX

We will thus export steps, heart rates, and breath rates all as separate files.

In [78]:
complete = {
    "distance": distance,
    'moving_time':moving_time,
    'elapsed_time':elapsed_time,
    'total_elevation_gain':total_elevation_gain,
    'average_speed':average_speed,
    'max_speed':max_speed,
    'average_heartrate':average_heartrate,
    'max_heartrate':max_heartrate,
    'map_summary_polyline':map_summary_polyline,
    'elev_high':elev_high,
    'elev_low':elev_low,
    'average_cadence':average_cadence,
    'average_watts':average_watts,
    'kilojoules':kilojoules
}

In [79]:
# distance_df = pd.DataFrame.from_dict(complete['distance'])
# distance_df.to_csv('distance.csv')
# distance_df.to_excel('distance.xlsx')

# moving_time_df = pd.DataFrame.from_dict(complete['moving_time'])
# moving_time_df.to_csv('moving_time.csv')
# moving_time_df.to_excel('moving_time.xlsx')

# elapsed_time_df = pd.DataFrame.from_dict(complete['elapsed_time'])
# elapsed_time_df.to_csv('elapsed_time.csv')
# elapsed_time_df.to_excel('elapsed_time.xlsx')

# total_elevation_gain_df = pd.DataFrame.from_dict(complete['total_elevation_gain'])
# total_elevation_gain_df.to_csv('total_elevation_gain.csv')
# total_elevation_gain_df.to_excel('total_elevation_gain.xlsx')

# average_speed_df = pd.DataFrame.from_dict(complete['average_speed'])
# average_speed_df.to_csv('average_speed.csv')
# average_speed_df.to_excel('average_speed.xlsx')

# max_speed_df = pd.DataFrame.from_dict(complete['max_speed'])
# max_speed_df.to_csv('max_speed.csv')
# max_speed_df.to_excel('max_speed.xlsx')

# average_heartrate_df = pd.DataFrame.from_dict(complete['average_heartrate'])
# average_heartrate_df.to_csv('average_heartrate.csv')
# average_heartrate_df.to_excel('average_heartrate.xlsx')

# max_heartrate_df = pd.DataFrame.from_dict(complete['max_heartrate'])
# max_heartrate_df.to_csv('max_heartrate.csv')
# max_heartrate_df.to_excel('max_heartrate.xlsx')

# map_summary_polyline_df = pd.DataFrame.from_dict(complete['map_summary_polyline'])
# map_summary_polyline_df.to_csv('map_summary_polyline.csv')
# map_summary_polyline_df.to_excel('map_summary_polyline.xlsx')

# elev_high_df = pd.DataFrame.from_dict(complete['elev_high'])
# elev_high_df.to_csv('elev_high.csv')
# elev_high_df.to_excel('elev_high.xlsx')

# elev_low_df = pd.DataFrame.from_dict(complete['elev_low'])
# elev_low_df.to_csv('elev_low.csv')
# elev_low_df.to_excel('elev_low.xlsx')

# average_cadence_df = pd.DataFrame.from_dict(complete['average_cadence'])
# average_cadence_df.to_csv('average_cadence.csv')
# average_cadence_df.to_excel('average_cadence.xlsx')

# average_watts_df = pd.DataFrame.from_dict(complete['average_watts'])
# average_watts_df.to_csv('average_watts.csv')
# average_watts_df.to_excel('average_watts.xlsx')

# kilojoules_df = pd.DataFrame.from_dict(complete['kilojoules'])
# kilojoules_df.to_csv('kilojoules.csv')
# kilojoules_df.to_excel('kilojoules.xlsx')

complete_df = pd.DataFrame.from_dict(complete)
complete_df.to_csv('data/complete.csv')
complete_df.to_excel('data/complete.xlsx')


## Visualization

In [80]:
client = StravaIO(access_token)
athlete = client.get_logged_in_athlete()
activities = client.get_logged_in_athlete_activities(after=20170101)

Fetched 30, the latests is on 2025-05-07 13:32:10+00:00
Fetched 30, the latests is on 2025-08-09 10:43:33+00:00
Fetched 26, the latests is on 2025-10-15 14:59:57+00:00


In [81]:
m = folium.Map(
    tiles="cartodbpositron",
    location=[49.4435, 1.0984],
    zoom_start=11.5,
    control_scale=True
)
folium.TileLayer("cartodbpositron").add_to(m)
folium.TileLayer("cartodbdark_matter").add_to(m)
folium.LayerControl().add_to(m)

<folium.map.LayerControl at 0x7a0fecca9a30>

In [82]:
def downsample(l, n):
    """Returns every nth element from list l. Returns the
    original list if n is set to 1.
    Used to reduce the number of GPS points per activity,
    to improve performance of the website.
    """

    return l[0::n]

def map_activities(activities, folium_map, opacity, weight):
    if len(activities) == 0:
        logging.info("No activities found, returning empty folium map.")
        return folium_map

    for a in activities:
        if a.type == "Workout":
            continue
        streams = client.get_activity_streams(a.id, athlete.id)
        try:
            points = list(zip(streams.lat, streams.lng))
            points = downsample(l=points, n=2)
            if a.type == "Run":
                folium.PolyLine(
                    locations=points, color="#ff9933", opacity=opacity, weight=weight
                ).add_to(folium_map)
            elif a.type == "Ride":
                folium.PolyLine(
                    locations=points, color="#0066ff", opacity=opacity, weight=weight
                ).add_to(folium_map)
            else:
                folium.PolyLine(
                    locations=points, color="#cc00ff", opacity=opacity, weight=weight
                ).add_to(folium_map)
            logging.critical("Mapped activity with id: {}".format(a.id))
        except Exception:
            logging.error("Could not map activity with id: {}".format(a.id))

    return folium_map

In [83]:
m = map_activities(
    activities=activities,
    folium_map=m,
    opacity=0.5,
    weight=1
)

CRITICAL:root:Mapped activity with id: 13901459544
CRITICAL:root:Mapped activity with id: 13906587182
CRITICAL:root:Mapped activity with id: 13927346809
CRITICAL:root:Mapped activity with id: 13943708973
CRITICAL:root:Mapped activity with id: 13954304186
CRITICAL:root:Mapped activity with id: 13972077955
CRITICAL:root:Mapped activity with id: 13993907029
CRITICAL:root:Mapped activity with id: 14002587041
CRITICAL:root:Mapped activity with id: 14020823219
CRITICAL:root:Mapped activity with id: 14038383362
CRITICAL:root:Mapped activity with id: 14061678650
CRITICAL:root:Mapped activity with id: 14068494201
CRITICAL:root:Mapped activity with id: 14078305148
CRITICAL:root:Mapped activity with id: 14088066118
CRITICAL:root:Mapped activity with id: 14106868843
CRITICAL:root:Mapped activity with id: 14147587813
CRITICAL:root:Mapped activity with id: 14164796198
CRITICAL:root:Mapped activity with id: 14175557185
CRITICAL:root:Mapped activity with id: 14194133150
CRITICAL:root:Mapped activity w

In [84]:
m