# Collecting Segment's Data from Strava

Exploiting the Strava API, data regarding segments of Italy's top climbs is collected.

Italy territory is ideally divided in squares of 0.1 degrees of latitude and 0.1 degrees of longitute: for each of these squares, via exploitation of Strava's API segments explorer, the best 10 segments are requested. 

In [2]:
import requests
import time
import pandas as pd
import numpy as np
import os

# Configure OAuth2 access token for authorization: strava_oauth
access_token = os.environ.get('STRAVA_API_KEY', '') #please configure this env var with your STRAVA API KEY

# Strava base url
base_url = 'https://www.strava.com/api/v3'

#headers
headers = {'Authorization': 'Bearer ' + access_token}

#short term limit min * sec (num sec)
short_term_limit = 15 * 60
max_no_req_short = 600
delay_short = short_term_limit / max_no_req_short

In [6]:
#no of segments to loads (batch of 10)
num_batches = 1000

In [7]:
#Explore segments on a given geographical area
endpoint = '/segments/explore'

# Italy Latitude and longitude
* Northernmost point — Testa Gemella Occidentale/Westliches Zwillingsköpfl, Prettau (Predoi), South Tyrol at 47°5′N 12°11′E
* Southernmost point — Punta Pesce Spada, Lampedusa, Sicily at 35°29′N 12°36′E (whole territory); Capo Spartivento, Palizzi, Calabria at 37°55′N 15°59′E (mainland)
* Westernmost point — Rocca Bernauda, Bardonecchia, Piedmont at 45°6′N 6°37′E
* Easternmost point — Capo d'Otranto, Otranto, Apulia at 40°6′N 18°31′E

In [8]:
def extract_segment_id(segments):
    ids = []
    for s in segments:
        ids.append(s["id"])
    return ids

In [3]:
min_lat = 35.48333333
max_lat = 47.5
min_lng = 6.37
max_lng = 18.31
lats = np.arange(min_lat, max_lat + 0.1, 0.1)
lngs = np.arange(min_lng, max_lng + 0.1, 0.1)

In [9]:
#bounds: ‘sw.lat,sw.lng,ne.lat,ne.lng’ ‘south,west,north,east’
payload = {'activity_type': 'riding', 'bounds': "", "min_cat": 1, "max_cat": 5}
print(base_url + endpoint)

df_segments = pd.DataFrame()
seg_indexes = []
no_found = 0
batch_no = 11
for lat1, lat2 in zip(lats[::2], lats[1::2]):
    for lng1, lng2 in zip(lngs[::2], lngs[1::2]):
        time.sleep(delay_short + 1)
        boundaries = "{},{},{},{}".format(lat1, lng1, lat2, lng2)
        print("boundaries " + boundaries)
        payload["bounds"] = boundaries
        r = requests.get(base_url + endpoint, data = payload, headers=headers)
        segments = r.json()["segments"]
        seg_indexes = extract_segment_id(segments)
        no_found += len(seg_indexes)
        print("Found segments (current/total): {}/{}".format(len(seg_indexes), no_found))
        df = pd.DataFrame(segments, index=seg_indexes)
        df_segments = df_segments.append(df, verify_integrity=True)
        #saving partial results 
        if (no_found >= num_batches):
            no_found = 0
            df_segments.to_csv(path_or_buf="strava-segments-detailed-updated-{}.csv".format(batch_no),index=True)
            batch_no += 1
            df_segments = pd.DataFrame()
#save segments
df_segments.to_csv(path_or_buf="strava-segments-detailed-updated-{}.csv".format(batch_no),index=True)    

https://www.strava.com/api/v3/segments/explore
boundaries 47.28333333000017,18.16999999999996,47.38333333000017,18.26999999999996
Found segments (current/total): 1/1


KeyboardInterrupt: 

In [16]:
segments_def = pd.DataFrame()
for i in range(0, batch_no):
    segments_def = segments_def.append(pd.read_csv("strava-segments-detailed-updated-{}.csv".format(i), index_col=0))

In [19]:
#remove possibile duplicates even though unlikely
segments_def = segments_def[~segments_def.index.duplicated(keep='first')]

In [23]:
segments_def.to_csv(path_or_buf="strava-segments-italy-full.csv".format(batch_no),index=True)    