In [None]:
from stravalib.client import Client
import time
import pandas as pd

from datetime import datetime

import plotly.express as px
import seaborn as sns
sns.set(font_scale=2)
sns.set_style("whitegrid")

import matplotlib.pyplot as plt
%matplotlib inline

## Load Data From Strava

## Web access post reuquest Demo (https://developers.strava.com/docs/getting-started/#oauth)
curl -X POST https://www.strava.com/oauth/token \
    -F client_id=40761 \
    -F client_secret=72638d8c256371fea5e3a90819267d838a7de254 \
    -F code=005352d0601908e4ae3f8748141d14583197feb8 \
    -F grant_type=authorization_code

In [None]:
client = Client()
scope = ['read', 'profile:read_all', 'activity:read']
authorize_url = client.authorization_url(client_id=40761, redirect_uri='http://localhost:8282/authorized', scope=scope)
print(f'Auth url: {authorize_url}')
# Have the user click the authorization URL, a 'code' param will be added to the redirect_uri
# .....

# Extract the code from your webapp response

code = '8ff30994605bf6d619de11731db26854e6b07d4a'
client_id = 40761
client_secret = '72638d8c256371fea5e3a90819267d838a7de254'


# code = request.get('code') # or whatever your framework does
token_response = client.exchange_code_for_token(client_id=client_id, client_secret=client_secret, code=code)
access_token = token_response['access_token']
refresh_token = token_response['refresh_token']
expires_at = token_response['expires_at']

# Now store that short-lived access token somewhere (a database?)
client.access_token = access_token
# You must also store the refresh token to be used later on to obtain another valid access token 
# in case the current is already expired
client.refresh_token = refresh_token

# An access_token is only valid for 6 hours, store expires_at somewhere and
# check it before making an API call.
client.token_expires_at = expires_at
print(f'expires at: {expires_at}')
 
athlete = client.get_athlete()
print("For {id}, I now have an access token {token}".format(id=athlete.id, token=access_token))

if time.time() > client.token_expires_at:
    refresh_response = client.refresh_access_token(client_id=1234, client_secret='asdf1234',
        refresh_token=client.refresh_token)
    access_token = refresh_response['access_token']
    refresh_token = refresh_response['refresh_token']
    expires_at = refresh_response['expires_at']

## Get some stats on current athlete (me)

In [None]:
client =  Client(access_token=access_token)

In [None]:
print("Hello, {}".format(athlete.firstname))

In [None]:
for activity in client.get_activities(after = "2010-01-01T00:00:00Z",  limit=5):
    print("{0.name} {0.moving_time}".format(activity))

In [None]:
activity_raw = client.get_activities(limit=1000)

In [None]:
activities = list(activity_raw)

In [None]:
print(len(activities))

### Filter On PCT

In [None]:
# Could do a regex to make this more accurate
pct_raw = [a for a in activities if a.name.startswith('Day ')]
pct_raw.reverse()
print([n.name for n in pct_raw[0:5]])

## Data Test

In [None]:
pct = {
    'elev_high' : [a.elev_high for a in pct_raw],
    'elev_low' :  [a.elev_low for a in pct_raw],
    'km': [float(a.distance / 1000) for a in pct_raw],
    'label': [a.name for a in pct_raw],
    'total_elevation_gain': [float(a.total_elevation_gain) for a in pct_raw],
    'activity_id': [a.id for a in pct_raw],
    'date': [datetime.date(a.start_date) for a in pct_raw]
}
pct = pd.DataFrame.from_dict(pct).dropna()
pct.head(20)

### Deal With Dirty data (is it just elevation on day 63?)

In [None]:
bad_data_filter = pct.elev_high - pct.elev_low > 5000
print(f'Dropping Bad Data: {pct[bad_data_filter]}')
pct = pct[~bad_data_filter]

In [None]:
#  Note: Added the "if" condition because there seems to be a bad day
y_plot_data = [(y_min, y_max) for y_min, y_max  in zip(pct.elev_low, pct.elev_high)]

In [None]:
f, axarr = plt.subplots(figsize=(10, 5), dpi=200)
sns.boxplot(x=pct.index, y=y_plot_data)
plt.title('Elevation Range Over Strava Activity #')
axarr.set_ylabel('Meters')
axarr.set_ylabel('Day')
plt.show()

In [None]:
pct.km.mean()

In [None]:
f, axarr = plt.subplots(figsize=(6, 4), dpi=100)
plt.title('Daily Distance Histogram')
sns.distplot(pct.km, bins=20)
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FormatStrFormatter

def build_pct_historgram(base_data=pct.km, default_dpi=500, lower_percentile=25, upper_percentile=75):
    data = base_data
    fig, ax = plt.subplots(figsize=(20, 8), dpi=default_dpi)
    counts, bins, patches = ax.hist(data, facecolor='yellow', edgecolor='gray')

    ax.set_xticks(bins)
    ax.xaxis.set_major_formatter(FormatStrFormatter('%0.1f'))

    lower_color_change_percentile, upper_color_change_percentile = np.percentile(data, [lower_percentile, upper_percentile])
    for patch, right_side, left_side in zip(patches, bins[1:], bins[:-1]):
        if right_side < lower_color_change_percentile:
            patch.set_facecolor('green')
        elif left_side > upper_color_change_percentile:
            patch.set_facecolor('red')

    bin_centers = 0.5 * np.diff(bins) + bins[:-1]
    for count, x in zip(counts, bin_centers):
        ax.annotate(str(count), xy=(x, 0), xycoords=('data', 'axes fraction'),
            xytext=(0, -18), textcoords='offset points', va='top', ha='center')

        # Label the percentages
        percent = '%0.0f%%' % (100 * float(count) / counts.sum())
        ax.annotate(percent, xy=(x, 0), xycoords=('data', 'axes fraction'),
            xytext=(0, -40), textcoords='offset points', va='top', ha='center')


    plt.subplots_adjust(bottom=0.15)
    plt.xlabel('Kilometers', labelpad=50)
    plt.ylabel('Bin Count')
    # Seems backwards but it's how they do it
    plt.title('Median Distance: ' + str(pct.km.median()),fontsize=16)
    plt.suptitle('Narner 2019 PCT Per-Actvity Distance Histogram: ' + str("Mean Diatance = ") + str(pct.km.mean()), fontsize=24)
    plt.show()
    plt.savefig('pct_distance_per_activity_loggged.png')

In [None]:
build_pct_historgram()

In [None]:
f, axarr = plt.subplots(figsize=(6, 4), dpi=100)
plt.title(f'Daily Elevation Change Histogram\nMean: {pct.total_elevation_gain.mean():.2f}m')
sns.distplot(pct.total_elevation_gain, bins=20)
plt.show()

In [None]:
km_to_mi = 0.621371
total_pct_miles = 2663
mpd = pct.km.mean() * 0.621371
total_distance_mi = sum(pct.km) * 0.621371
bias = total_distance_mi / total_pct_miles

print(f'Miles Per Day: {mpd:.2f}')
print(f'Total Measured Distance: {total_distance_mi:g} mi')
print(f'Real Distance: {total_pct_miles} mi')
print(f'Recording Bias: {bias*100:0.2f}% of total mileage recorded')


### Zeros/Neros
Task: Regex out day#, then create distance=0 rows for zero days

### High Fidelity Activity Data

In [None]:
pct.sort_values('km', ascending=False)

In [None]:
sample_id = 2747816656 # Day 1
types = ['time', 'latlng', 'altitude', 'heartrate', 'temp', 'velocity_smooth']
sample_stream = client.get_activity_streams(activity_id=sample_id, types=types)

In [None]:
sample_stream

In [None]:
sample_stream['heartrate'].data

In [None]:
sample_stream['time'].data[-1]

In [None]:
meters_per_second_to_miles_per_hour = 

In [None]:
sample_stream['distance'].data[-1] / sample_stream['time'].data[-1] * 

In [None]:
stream_data