## Strava GPX Analysis
### Manny Lazalde
### August 25, 2024

I want to perform analysis on my Bayshore Marathon 2024 Strava GPX Data. 

Inspired by https://www.youtube.com/watch?v=F6GZdVemCxw&list=PLQ5j-FTc2VhDj93jQas0a8AvNfSMEiDxz

#### Imports

In [15]:
#checking python version
import sys
print(sys.version)

#web url libraries
import requests
import urllib3
import json
from urllib.request import urlopen
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

#for getting info from lat/long
#from geopy.geocoders import Nominatim

import pandas as pd
#Datetime will allow Python to recognize dates as dates, not strings.
from datetime import datetime

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time

import gpxpy
import gpxpy.gpx
import matplotlib.pyplot as plt

import folium
import xyzservices.providers as xyz

3.10.15 | packaged by conda-forge | (main, Sep 20 2024, 16:26:40) [MSC v.1941 64 bit (AMD64)]


#### GPX Extraction

In [2]:
# Use GPX to open as a GPX object
with open('Bayshore_Marathon.gpx', 'r') as gpx_file:
    gpx = gpxpy.parse(gpx_file)


In [3]:
# Extract all data points into list of dicts
route_info = []

for track in gpx.tracks:
    for segment in track.segments:
        for point in segment.points:
            route_info.append({
                'latitude': point.latitude,
                'longitude': point.longitude,
                'elevation': point.elevation
            })

# Convert to DataFrame
route_df = pd.DataFrame(route_info)
route_df.head()

#save to .csv with latitude, longitude, and altitude
route_df.to_csv('Bayshore_Marathon.csv', index=False)

In [4]:
route_df

Unnamed: 0,latitude,longitude,elevation
0,44.766742,-85.584262,185.6
1,44.766742,-85.584262,185.6
2,44.766742,-85.584262,185.6
3,44.766742,-85.584262,185.6
4,44.766742,-85.584262,185.6
...,...,...,...
10141,44.767309,-85.585904,187.8
10142,44.767309,-85.585904,187.8
10143,44.767309,-85.585904,187.8
10144,44.767309,-85.585904,187.8


#### Folium Map

In [None]:
route_map = folium.Map(
    location=[44.766742,	-85.584262],
    zoom_start=11,
    # attr = (
    # '&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> '
    # 'contributors, &copy; <a href="https://cartodb.com/attributions">CartoDB</a>'
    # ),
    # tiles = "https://{s}.basemaps.cartocdn.com/light_nolabels/{z}/{x}/{y}.png"

    attr = (
        '&copy; <a href="https://stadiamaps.com/" target="_blank">Stadia Maps</a>'
        '&copy; <a href="https://stamen.com/" target="_blank">Stamen Design</a>' 
        '&copy; <a href="https://openmaptiles.org/" target="_blank">OpenMapTiles</a>' 
        '&copy; <a href="https://www.openstreetmap.org/copyright" target="_blank">OpenStreetMap</a>'
        ),
    tiles = "https://tiles.stadiamaps.com/tiles/stamen_watercolor/{z}/{x}/{y}.jpg?api_key=79d21fa8-7dc9-4a9e-887b-4ad3d9df072e"
)


# for _, row in route_df.iterrows():
#     folium.CircleMarker(
#         location=[row['latitude'], row['longitude']],
#         radius=3,
#     ).add_to(route_map)

display(route_map)

In [33]:
route_map = folium.Map(
    location=[44.766742,	-85.584262],
    zoom_start=11,
    )

#Add the Stadia Maps Stamen Toner provider details via xyzservices
tile_provider = xyz.Stadia.AlidadeSmooth

#Update the URL to include the API key placeholder
tile_provider["url"] = tile_provider["url"] + "?api_key={api_key}"

#Create the folium TileLayer, specifying the API key
folium.TileLayer(
    tiles=tile_provider.build_url(api_key='79d21fa8-7dc9-4a9e-887b-4ad3d9df072e'),
    attr=tile_provider.attribution,
    name=tile_provider.name,
    max_zoom=tile_provider.max_zoom,
    detect_retina=True
).add_to(route_map)

folium.LayerControl().add_to(route_map)
#route_map
tile_provider


In [36]:
route_map = folium.Map(
    location=[44.766742,	-85.584262],
    zoom_start=11,
    )

#Add the Stadia Maps Stamen Toner provider details via xyzservices
tile_provider = xyz.Stadia.AlidadeSmooth

#Update the URL to include the API key placeholder
tile_provider["url"] = tile_provider["url"] + "?api_key={api_key}"

#Create the folium TileLayer, specifying the API key
folium.TileLayer(
    tiles=tile_provider.build_url(api_key='79d21fa8-7dc9-4a9e-887b-4ad3d9df072e'),
    attr=tile_provider.attribution,
    name=tile_provider.name,
    max_zoom=tile_provider.max_zoom,
    detect_retina=True
).add_to(route_map)

folium.LayerControl().add_to(route_map)

for _, row in route_df.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=1,
    ).add_to(route_map)

route_map


In [6]:
xyz.MapBox.requires_token()

True

#### Code for Grabbing Strava Data

In [None]:
#Taken from https://github.com/fpolignano/Code_From_Tutorials/blob/master/Strava_Api/strava_api.py
#Code to grab current Strava data

auth_url = "https://www.strava.com/oauth/token"
activites_url = "https://www.strava.com/api/v3/athlete/activities"

payload = {
    'client_id': "91559",
    'client_secret': '0b4ba39eb19596b212db839859d53038ee0ef8cb',
    'refresh_token': 'fa0ed46c3baec8feeac22cf4c3819a3758439b4a',
    'grant_type': "refresh_token",
    'f': 'json'
}

print("Requesting Token...\n")
res = requests.post(auth_url, data=payload, verify=False)
access_token = res.json()['access_token']
print("Access Token = {}\n".format(access_token))

header = {'Authorization': 'Bearer ' + access_token}

## Updating this portion to grab all of the runs from Strava in total, instead of just the last 200

#param = {'per_page': 200, 'page': 1}
#my_dataset = requests.get(activites_url, headers=header, params=param).json()
#print(my_dataset[0]["name"])
#print(my_dataset[0]["map"]["summary_polyline"])

param = {'per_page': 200, 'page': 1}
#store strava data in list, cause too complicated to merge in while loop
df_list = []

while True:
    
    #grab the data from strava through an http request
    r = requests.get(activites_url, headers=header, params=param).json()
    
    # if no results then exit loop
    if (not r):
        break
    
    #store dataframes in list
    df_list.append(pd.json_normalize(r))
    
    #keep iterating till all data is collected
    param['page'] = param['page'] + 1

print("Success! Got it all without error!")

#### Code for performing data analysis on Strava Data

In [3]:
# First move is to take list of all dataframes and merge to single dataframe. Do so below
activities = df_list[0]

for i in range(len(df_list)):
    if i == 0:
        continue
    activities = pd.concat([activities,df_list[i]])

#export to csv for visualization
activities.to_csv("Activies.csv")

#lets drop the useless columns
activities = activities[['resource_state', 'name', 'distance','elapsed_time',
       'total_elevation_gain', 'type', 'workout_type', 'id',
       'start_date_local', 'timezone',
        'start_latlng', 'end_latlng', 'average_speed',
       'max_speed', 'elev_high', 'elev_low', 'athlete.id']]


#Perform data manipulation
activities['start_date_local'] = pd.to_datetime(activities['start_date_local'])
activities['start_time'] = activities['start_date_local'].dt.time
activities['start_date_local'] = activities['start_date_local'].dt.date
activities['distance_miles'] = round(activities['distance'] / 1609.34,2)
activities['time_minutes'] = round(activities['elapsed_time'] / 60)

#grab city from start_lat/long - Have to be careful with this if desired
# https://operations.osmfoundation.org/policies/nominatim/
#geolocator = Nominatim(user_agent="test")
#location = geolocator.reverse("42.3513587, -83.0653791")

#export to excel for visualization
activities.to_excel("Activies.xlsx")

#activities