In [None]:
# Set to False to really update Strava.
# Start with True to see what WOULD be changed.
DRY_RUN = False

# Optional: limit how many activities to update in one run (None = no limit)
MAX_UPDATES = 100

In [8]:
import os
from dotenv import load_dotenv

# Load variables from .env into environment
load_dotenv()

# Read them into constants
CLIENT_ID = os.getenv("CLIENT_ID")
CLIENT_SECRET = os.getenv("CLIENT_SECRET")
REFRESH_TOKEN = os.getenv("REFRESH_TOKEN")

if not all([CLIENT_ID, CLIENT_SECRET, REFRESH_TOKEN]):
    raise RuntimeError("Missing one or more Strava credentials in .env")

In [1]:
import requests
import time

In [None]:
def get_access_token(client_id, client_secret, refresh_token):
    """
    Use the refresh token to get a fresh access token from Strava.
    Also returns the full token payload so we can inspect scopes.
    """
    url = "https://www.strava.com/oauth/token"
    payload = {
        "client_id": client_id,
        "client_secret": client_secret,
        "grant_type": "refresh_token",
        "refresh_token": refresh_token,
    }
    resp = requests.post(url, data=payload)
    resp.raise_for_status()
    data = resp.json()
    scopes = data.get("scope")
    print(f"Token scopes: {scopes}")
    if "activity:write" not in str(scopes):
        print("⚠️ WARNING: token does NOT include 'activity:write' – updates will fail.")
    return data["access_token"], data


def fetch_all_activities(access_token, per_page=200):
    """
    Fetch all activities for the authenticated athlete.
    Strava paginates results; we loop until an empty page is returned.
    """
    url = "https://www.strava.com/api/v3/athlete/activities"
    headers = {"Authorization": f"Bearer {access_token}"}

    page = 1
    all_activities = []

    while True:
        params = {"page": page, "per_page": per_page}
        resp = requests.get(url, headers=headers, params=params)
        resp.raise_for_status()
        activities = resp.json()
        if not activities:
            break

        all_activities.extend(activities)
        print(f"Fetched page {page}, activities this page: {len(activities)}")
        page += 1

    print(f"Total activities fetched: {len(all_activities)}")
    return all_activities


# def update_activity_sport_type(access_token, activity_id, new_sport_type="Canoeing"):
#     """
#     Update a single activity's sport_type.

#     According to Strava's Swagger / reference, sport_type must be one of the enum values,
#     e.g. 'Canoeing', 'Kayaking', 'Workout', 'Tennis', 'HighIntensityIntervalTraining', etc.
#     """
#     url = f"https://www.strava.com/api/v3/activities/{activity_id}"
#     headers = {"Authorization": f"Bearer {access_token}"}

#     # IMPORTANT: send as form-encoded data, NOT JSON
#     payload = {
#         "sport_type": new_sport_type
#         # Optional: also set deprecated 'type' to keep it consistent:
#         # "type": new_sport_type
#     }

#     resp = requests.put(url, headers=headers, data=payload)

#     if resp.status_code == 200:
#         print(f"✅ Updated activity {activity_id} to sport_type={new_sport_type}")
#     else:
#         print(f"⚠️ Failed to update activity {activity_id}: {resp.status_code} {resp.text}")


def main():
    # 1) Get access token
    print("Getting access token...")
    access_token, token_data = get_access_token(CLIENT_ID, CLIENT_SECRET, REFRESH_TOKEN)
    print("Access token obtained.")

    # 2) Fetch all activities
    activities = fetch_all_activities(access_token)

    # 3) Filter activities that are generic 'Workout'
    #    We ONLY touch those where sport_type is exactly "Workout".
    #    This avoids changing Tennis, HIIT, etc. that happen to have type="Workout".
    workout_activities = [
        a for a in activities
        if a.get("sport_type") == "Workout"
    ]

    print(f"Found {len(workout_activities)} activities with sport_type == 'Workout'.")

    # 4) Update each to 'Canoeing'
    updates_done = 0

    for a in workout_activities:
        activity_id = a["id"]
        name = a.get("name", "")
        start_date = a.get("start_date_local", "")
        current_sport = a.get("sport_type")
        current_type = a.get("type")

        print(f"\nActivity ID: {activity_id} | Name: {name} | Date: {start_date}")
        print(f"Current sport_type: {current_sport} (type: {current_type})")

        if DRY_RUN:
            print("DRY RUN → would update sport_type to 'Canoeing'")
        else:
            update_activity_sport_type(access_token, activity_id, new_sport_type="Canoeing")
            # Be polite with rate limits; 1 req/sec = 60/min, well under 200 / 15min default.
            time.sleep(1)

        updates_done += 1
        if MAX_UPDATES is not None and updates_done >= MAX_UPDATES:
            print(f"\nReached MAX_UPDATES={MAX_UPDATES}, stopping.")
            break

    print(f"\nFinished. Activities processed: {updates_done}. DRY_RUN={DRY_RUN}")


if __name__ == "__main__":
    main()

In [2]:
import os
import requests
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

In [3]:
# # Where to store / read your cached activities
DATA_PATH = r"C:\Users\RuiCoelho\Desktop\Rui Coelho\Me\Strava_Analysis\data\strava_activities.parquet"

In [None]:
# ============================================================
# API LAYER
# ============================================================

def get_access_token(client_id: str, client_secret: str, refresh_token: str) -> str:
    """
    Use the refresh token to get a fresh access token from Strava.
    Requires that the refresh token was created with at least:
    - read, activity:read_all (to fetch all activities)
    """
    url = "https://www.strava.com/oauth/token"
    payload = {
        "client_id": client_id,
        "client_secret": client_secret,
        "grant_type": "refresh_token",
        "refresh_token": refresh_token,
    }
    resp = requests.post(url, data=payload)
    resp.raise_for_status()
    data = resp.json()
    # Strava often omits 'scope' here; that's OK
    print("Access token obtained.")
    return data["access_token"]


def fetch_all_activities_from_strava(access_token: str, per_page: int = 200) -> list:
    """
    Fetch all activities for the authenticated athlete.
    Strava paginates results; we loop until an empty page is returned.
    Returns a list of activity dicts (raw JSON from Strava).
    """
    url = "https://www.strava.com/api/v3/athlete/activities"
    headers = {"Authorization": f"Bearer {access_token}"}

    page = 1
    all_activities = []

    while True:
        params = {"page": page, "per_page": per_page}
        resp = requests.get(url, headers=headers, params=params)
        resp.raise_for_status()
        activities = resp.json()

        if not activities:
            break

        all_activities.extend(activities)
        print(f"Fetched page {page}, activities this page: {len(activities)}")
        page += 1

    print(f"Total activities fetched: {len(all_activities)}")
    return all_activities

# ============================================================
# STORAGE LAYER
# ============================================================

def save_activities_to_parquet(activities: list, path: str) -> None:
    """
    Save raw activity JSON list to a Parquet file via pandas.
    Keeps all fields Strava returns for flexibility.
    """
    df = pd.json_normalize(activities)

    # Make sure start_date_local is always present
    if "start_date_local" not in df.columns:
        df["start_date_local"] = pd.NaT

    df.to_parquet(path, index=False)
    print(f"Saved {len(df)} activities to {path}")

def main():
    """
    Auto mode:
    - If parquet file exists → ANALYSIS
    - If not → EXTRACT → then ANALYSIS
    """

    if not os.path.exists(DATA_PATH):
        print("Parquet file not found. Switching to EXTRACT mode...")
        
        access_token = get_access_token(CLIENT_ID, CLIENT_SECRET, REFRESH_TOKEN)
        activities = fetch_all_activities_from_strava(access_token)
        save_activities_to_parquet(activities, DATA_PATH)

    # print("Running in ANALYSIS mode...")
    # df = load_activities_from_parquet(DATA_PATH)
    # df = prepare_dataframe(df)


if __name__ == "__main__":
    main()

Parquet file not found. Switching to EXTRACT mode...
Access token obtained.
Fetched page 1, activities this page: 200
Fetched page 2, activities this page: 200
Fetched page 3, activities this page: 200
Fetched page 4, activities this page: 200
Fetched page 5, activities this page: 200
Fetched page 6, activities this page: 200
Fetched page 7, activities this page: 200
Fetched page 8, activities this page: 200
Fetched page 9, activities this page: 200
Fetched page 10, activities this page: 200
Fetched page 11, activities this page: 46
Total activities fetched: 2046
Saved 2046 activities to C:\Users\RuiCoelho\Desktop\Rui Coelho\Me\Strava_Analysis\data\strava_activities.parquet
Running in ANALYSIS mode...
Loaded 2046 activities from C:\Users\RuiCoelho\Desktop\Rui Coelho\Me\Strava_Analysis\data\strava_activities.parquet


NameError: name 'prepare_dataframe' is not defined