In [None]:
# <------ press the play button in the upper left corner when finished
CLIENT_ID = 'YOUR CLIENT ID'
CLIENT_SECRET = 'YOUR CLIENT SECRET'
REFRESH_TOKEN = 'YOUR REFRESH TOKEN'


CLIENT_ID = '92250'
# de9fe4848de472b31e678e456920aee3101993dc
REFRESH_TOKEN = '1a0dbcab4b5a442d4a022b900b2786500ef43c12'
CLIENT_SECRET = 'd9907c8a346944640c736f3a1c1ccda429a731df'
# REFRESH_TOKEN = '84211bfa5481f36a36ab3aeaf54b16ec43c6c083'

In [2]:
import pandas as pd
import numpy as np
import scipy as sp
from scipy import stats
from numpy.polynomial import polynomial
import requests
import urllib3
import matplotlib.pyplot as plt
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


# coeffs is a list of the coefficients for the line of best fit (i.e. [4, -.5, 19]
# for the function "4x^2 - .5x + 19" or [3, 28] for "3x + 28". x and y are the
# column names for the data that is being graphed, and activity df is the df
# with all of the data that is to be graphed (rows without data for x and y
# must be excluded).
def calc_r_sq(coeffs, x, y, activity_df):
    x = list(activity_df[x])
    y = list(activity_df[y])

    expected_values = []
    for x_val in x:
        # In english: expected_values.append(f(x)) where x=x_val and f(x) is an
        # nth order polynomial where n=len(coeffs)
        expected_values.append(sum([coeffs[coeff_num] * x_val**coeff_num for coeff_num in range(len(coeffs))]))
    
    r = stats.pearsonr(x=expected_values, y=y)[0]
    r_sq = r**2

    return r_sq


# Plot the line of best fit for a line and 2nd-order polynomial. x and y are
# the names of the columns to be used, and activity_df is the dataframe with
# all the data
def plot_best_fit(x, y, activity_df):
    y_vs_x_text = f"{y} vs {x}" # Name of graph
    print(f"\n{y_vs_x_text}")

    # If there are no data points for specified x and y, remove those rows. 
    # Then sort and plot the data.
    activity_df = activity_df.dropna(subset=[x, y])
    activity_df = activity_df.sort_values(by=x)
    activity_df.plot(x=x, y=y, kind='scatter', color='black', figsize=(20,10))

    # Calculate 2nd-degree polynomial and r^2 for line of best fit
    poly_coeff = polynomial.polyfit(x=activity_df[x], y=activity_df[y], deg=2)
    poly_r_sq = calc_r_sq(poly_coeff, x, y, activity_df)
    poly_equation_text = "y = %.3gx^2 + %.3gx + %.3g   (r^2 = %.2g)" % (poly_coeff[2], poly_coeff[1], poly_coeff[0], poly_r_sq)

    # Calculate linear equation and r^2 for line of best fit
    linear_coeff = polynomial.polyfit(x=activity_df[x], y=activity_df[y], deg=1)
    linear_r_sq = calc_r_sq(linear_coeff, x, y, activity_df)
    linear_equation_text = "y = %.3gx + %.3g   (r^2 = %.2g)" % (linear_coeff[1], linear_coeff[0], linear_r_sq)

    print(poly_equation_text)
    print(linear_equation_text)

    # Plot lines of best fit
    plt.plot(activity_df[x], linear_coeff[1] * activity_df[x] + linear_coeff[0], color='green')
    plt.plot(activity_df[x], poly_coeff[2] * (activity_df[x]**2) + poly_coeff[1] * activity_df[x] + poly_coeff[0], color='blue')

    # Display equations in upper-right-hand corner
    x_range = max(activity_df[x]) - min(activity_df[x])
    y_range = max(activity_df[y]) - min(activity_df[y])
    plt.text(max(activity_df[x]) - .25*x_range, max(activity_df[y]) + .125*y_range, poly_equation_text, size=14)
    plt.text(max(activity_df[x]) - .25*x_range, max(activity_df[y]) + .075*y_range, linear_equation_text, size=14)

    # Save and show image
    plt.savefig(f"{y_vs_x_text}.png")
    plt.show()


auth_url = "https://www.strava.com/oauth/token"
activites_url = "https://www.strava.com/api/v3/athlete/activities"

payload = {
    'client_id': CLIENT_ID,
    'client_secret': CLIENT_SECRET,
    'refresh_token': REFRESH_TOKEN,
    'grant_type': "refresh_token",
    'f': 'json'
}

print("Requesting Token...\n")
res = requests.post(auth_url, data=payload, verify=False)
print(res.json())
access_token = res.json()['access_token']
print("Access Token = {}\n".format(access_token))

downloaded_df_columns = [
    'start_date_local',
    'name',
    'distance', # meters
    'moving_time',  # seconds
    'elapsed_time', # seconds
    'total_elevation_gain', # meters
    'elev_high',
    'elev_low',
    'trainer',
    'commute',
    'manual',
    'sport_type',
    'gear_id',
    'average_speed',
    'max_speed',
    'average_cadence',
    'average_heartrate',
    'max_heartrate',
    'weighted_average_watts',
    'average_watts',
    'kilojoules',
    'device_watts',
    'average_temp',
    'suffer_score'
]

activity_df_columns = [
    'start_date_local',
    'name',
    'distance', # meters
    'moving_time',  # seconds
    'elapsed_time', # seconds
    'total_elevation_gain', # meters
    'elev_high',
    'elev_low',
    'trainer',
    'commute',
    'manual',
    'sport_type',
    'gear_id',
    'average_speed',
    'max_speed',
    'average_cadence',
    'average_heartrate',
    'max_heartrate',
    'est_avg_watts',
    'weighted_average_watts',
    'average_watts',
    'kilojoules',
    'device_watts',
    'average_temp',
    'suffer_score'
]

activity_df = pd.DataFrame(columns=activity_df_columns)
print("Loading data... Shouldn't take more than 10 seconds per page.")
for page_num in range(1, 3):
    print()
    print(f"---------------------PAGE: {page_num}----------------------")
    print(f"Downloading Page {page_num}...")
    header = {'Authorization': 'Bearer ' + access_token}
    param = {'per_page': 200, 'page': page_num}
    initial_json = requests.get(activites_url, headers=header, params=param).json()
    downloaded_df = pd.json_normalize(initial_json)

    activity_id_list = downloaded_df['id']
    print(activity_id_list)

    for activity_id in activity_id_list:
        # {https://www.strava.com/api/v3/activities}/{7623728460}/streams?keys=distance&key_by_type=true
        activity_link = f"{activites_url}/{activity_id}/streams?keys=distance&key_by_type=true"
        individual_json = requests.get(activity_link, headers=header)
        print(f"Activity: {activity_id}\n{individual_json}")


    downloaded_df = downloaded_df[downloaded_df_columns]

    downloaded_df = downloaded_df.copy()
    downloaded_df['est_avg_watts'] = downloaded_df['average_watts'].loc[downloaded_df['device_watts'] == False]
    downloaded_df['average_watts'] = downloaded_df['average_watts'].loc[downloaded_df['device_watts'] == True]

    activity_df = pd.concat([activity_df, downloaded_df], ignore_index=True)

activity_df['start_date_local'] = pd.to_datetime(activity_df['start_date_local'], infer_datetime_format=True)

print(f"Bottom 1% weighted avg power: {activity_df['weighted_average_watts'].quantile(.01)}")
activity_df = activity_df.loc[activity_df['weighted_average_watts'] > activity_df['weighted_average_watts'].quantile(.01)]  # crop out noodle rides
activity_df['watts_per_bpm'] = activity_df['average_watts'] / activity_df['average_heartrate']
activity_df['weighted_watts_per_bpm'] = activity_df['weighted_average_watts'] / activity_df['average_heartrate']
activity_df['w_per_bpm_per_distance'] = activity_df['average_watts'] / activity_df['average_heartrate'] / activity_df['distance']
# print(f"Available data: {activity_df.columns}")
print(activity_df.head(3))

Activity: 10350797642
<Response [404]>
Activity: 10349621232
<Response [404]>


In [None]:
payload = {
    'client_id': CLIENT_ID,
    'client_secret': CLIENT_SECRET,
    'refresh_token': REFRESH_TOKEN,
    'grant_type': "refresh_token",
    'f': 'json'
}

print("Requesting Token...\n")
res = requests.post(auth_url, data=payload, verify=False)
print(res.json())
access_token = res.json()['access_token']
print("Access Token = {}\n".format(access_token))

stream_keys = [
    'time',
    'distance',
    'altitude',
    'velocity_smooth',
    'heartrate',
    'cadence',
    'watts',
    'temp',
    'grade_smooth'
]
activities = [7623728460, 7618415912]
current_activity_df = pd.DataFrame(columns=[)
for activity in activities:
    for stream_key in stream_keys:
        data = requests.get(f"https://www.strava.com/api/v3/activities/{activity}/streams?keys={stream_key}&key_by_type=true", params=param, headers=header)
        print(data.json())