In [None]:
import requests
import urllib3
import json
import csv
import os
import datetime

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [None]:
pd.set_option('display.max_columns', 83)
pd.set_option('display.max_colwidth', 25)
pd.set_option('display.max_rows', None)
pd.set_option("display.precision", 2)
sns.set(rc={'figure.figsize':(12.7,8.27)})

In [None]:
cd = os.path.abspath(os.getcwd())
with open(f'{cd}/config.json') as f:
    data = json.load(f)

In [None]:
auth_url = "https://www.strava.com/oauth/token"
activites_url = "https://www.strava.com/api/v3/athlete/activities"
payload = data['payload']
res = requests.post(auth_url, data=payload, verify=False)
header = {'Authorization': 'Bearer ' + res.json()['access_token']}

my_activities = pd.DataFrame()
for page in range(1,99):
    my_dataset = requests.get(activites_url, headers=header, params={'per_page': 200, 'page': page}).json()
    my_dataframe = pd.json_normalize(my_dataset)
    my_activities = pd.concat([my_activities, my_dataframe], ignore_index=True)

In [None]:
my_acts = my_activities.copy(deep=True)

In [None]:
my_acts = my_acts.rename(columns={'average_speed' : 'average_speed_mps', 
                                        'max_speed' : 'max_speed_mps',
                                        'moving_time' : 'moving_time_s',
                                        'elapsed_time' : 'elapsed_time_s'})

# fix 'start_date_local' and create 2 new columns 'start_time' and 'moving_time(min)'
my_acts['start_date_local'] = pd.to_datetime(my_acts['start_date_local'])
my_acts['start_time'] = my_acts['start_date_local'].dt.time
my_acts['start_date_local'] = my_acts['start_date_local'].dt.strftime("%d/%m/%y")

# seconds to minutes
my_acts['moving_time(min)'] = my_acts['moving_time_s'].apply(lambda x: pd.to_datetime(x, unit='s').strftime('%H:%M:%S'))

# meters to kms
my_acts['distance'] = my_acts[
    (my_acts['type'] == 'Run') |
    (my_acts['type'] == 'Walk')]['distance'] / 1000

# meters per second to kms per hour
my_acts['average_speed_mps'] = my_acts['average_speed_mps'] * (18/5)
my_acts['max_speed_mps'] = my_acts['max_speed_mps'] * (18/5)

# from (GMT+01:00) Europe/Berlin -> Europe/Berlin
my_acts['timezone'] = my_acts['timezone'].str.split(' ').str[-1]

# rename converted columns
my_acts = my_acts.rename(columns={'average_speed_mps': 'average_speed_kmh',
                                        'max_speed_mps' : 'max_speed_kmh',
                                        'distance' : 'distance_km'})

In [None]:
cols = ['upload_id', 'name', 'type', 'distance_km', 'moving_time(min)','start_time',  'start_date_local',  'timezone',
        'average_speed_kmh', 'max_speed_kmh', 'total_elevation_gain', 'average_heartrate',
        'max_heartrate', 'achievement_count', 'kudos_count', 'visibility',  ]
my_acts = my_acts[cols]

# usage -> 25.1 KB to 22.6 KB
my_acts['visibility'] = pd.Categorical(my_acts['visibility'])
my_acts['timezone'] = pd.Categorical(my_acts['timezone'])

In [None]:
my_acts

In [None]:
with open(f'{cd}/csvs/all_activities.csv', 'w') as activs_f:
    my_acts.to_csv(activs_f)

In [None]:
with open(f'{cd}/not_needed/all_activities.csv', 'r') as activs_f:
    activities = pd.read_csv(activs_f, delimiter=',' , encoding='iso-8859-1', index_col=[0])

In [None]:
activities

 IDEAS
 check the name: 'run'-> move to a new df['running']
                'other' -> df['rest_sessions']
activities

In [None]:
activities['type'].value_counts().plot(kind='bar', rot=30).set_title("Each activity's # of occurences")

In [None]:
sns.countplot(x='type',
              data=activities)

In [None]:
run = activities.loc[activities['type'] == 'Run']

In [None]:
run.shape

In [None]:
sns.countplot(x='name', 
              data=run, 
              order=pd.value_counts(run['name']).iloc[:4].index)
locs, labels = plt.xticks(rotation=45, fontsize=8)
plt.title("Different named runs and their couns")
plt.xlabel("Name of run")
plt.ylabel("Count")

In [None]:
run_5_more = run[run['distance_km'] > 5]

In [None]:
sns.scatterplot(x='distance_km',
                y='moving_time(min)',
                data=run_5_more)
locs,label = plt.xticks(rotation=90, fontsize=3)

In [None]:
sns.set(style="ticks", context="talk")
sns.regplot(x="distance_km", y="average_speed_kmh", data=run).set_title("Average Speed vs Distance")

In [None]:
sns.set(style="ticks", context="talk")
sns.regplot(x='distance_km', y='max_speed_kmh', data=run).set_title("Max Speed vs Distance")

In [None]:
sns.boxplot(x='name',y='distance_km',data=run_5_more)
locs, labels = plt.xticks(rotation=45, fontsize=12)

In [None]:
sns.displot(run['distance_km'])