In [1]:
import pandas as pd

activities = pd.read_csv('../data/strava/activities.csv')

In [10]:
activities.loc[activities['Activity ID']==11823912,'Activity Name'][0]

'Jun 26, 2012'

In [30]:
activities.loc[activities['Filename'].str.contains('10002481365',na=False),:].shape[0]==1

True

In [2]:
metadata = pd.read_csv('../data/metadata/processed_files.csv',header=None)
metadata.columns = ['original_filename','track_name','activity_name']
metadata

Unnamed: 0,original_filename,track_name,activity_name
0,10002481365.fit,10002481365,10002481365
1,10002481365.fit,10002481365,10002481365
2,10010559674.fit,10010559674,10010559674
3,10010559674.fit,10010559674,10010559674
4,10022163615.tcx,10022163615,10022163615
...,...,...,...
3382,9985339789.fit,9985339789,9985339789
3383,9985339789.fit,9985339789,9985339789
3384,9990211732.tcx,9990211732,9990211732
3385,9990211732.tcx,9990211732,9990211732


In [16]:
metadata[metadata['track_name']=='11823912']

Unnamed: 0,original_filename,track_name,activity_name
444,11823912.gpx,11823912,"Jun 26, 2012"


In [54]:
# Example token refresh
# curl -X POST https://www.strava.com/api/v3/oauth/token \
#   -d client_id=ReplaceWithClientID \
#   -d client_secret=ReplaceWithClientSecret \
#   -d grant_type=refresh_token \
#   -d refresh_token=ReplaceWithRefreshToken

import pandas as pd
import numpy as np
from pandas import DataFrame as PandasDataFrame

def load_activities_from_archive() -> PandasDataFrame:
    activities = pd.read_csv('../data/strava/activities.csv')
    activities['original_filename'] = activities['Filename'].str.split('/').str[-1].str.split('.').str[0]
    activities['Activity Date'] = pd.to_datetime(activities['Activity Date'], format='%b %d, %Y, %I:%M:%S %p')
    return activities

activities = load_activities_from_archive()

def get_activity_detail(activity_id, column):
    try:
        detail = activities.loc[activities['Activity ID'] == activity_id, column][0]
    except KeyError:
        print(f"Couldn't locate activity ID {activity_id}")
        detail = None
    return detail

def get_activity_name(activity_id):
    try:
        detail = activities.loc[activities['Activity ID'] == activity_id, 'Activity Name'][0]
    except KeyError:
        try:
            details = activities.loc[activities['Filename'].str.contains(str(activity_id), na=False), 'Activity Name']
            assert details.shape[0] == 1
            detail = details.values[0]
        except KeyError:
            print(f"Couldn't locate activity ID {activity_id}")
            detail = None
    return detail

def get_activity_date(activity_id):
    return get_activity_detail(activity_id, 'Activity Date')

def get_name_and_date(files_available) -> PandasDataFrame:
    fa_pd = pd.DataFrame(files_available, columns=['original_filename_with_ext'])
    fa_pd['original_filename'] = fa_pd['original_filename_with_ext'].str.split('.').str[0]
    files_with_details = pd.merge(fa_pd,activities,on='original_filename',how='left')
    details = files_with_details[['original_filename','Activity Name', 'Activity Date']]
    details.columns = ['original_filename','name','date']
    #Ensure dates are formatted correctly so they get ordered correctly later
    return details

In [61]:
def calculate_relative_effort_by_week(activities):
    activities = activities.copy()
    activities['year'] = activities['Activity Date'].dt.isocalendar().year
    activities['week'] = activities['Activity Date'].dt.isocalendar().week
    activities['pr_weighting'] = np.where((activities['Relative Effort'].isnull())&(~activities['Perceived Relative Effort'].isnull()),1,activities['Prefer Perceived Exertion'])
    activities['pf_effort'] = activities['pr_weighting']*activities['Perceived Relative Effort']
    activities['pf_effort'] = np.where(activities['pf_effort']==0,np.nan,activities['pf_effort'])
    activities['combined_relative_effort'] = activities['pf_effort'].combine_first(activities['Relative Effort'])
    effort_by_week = activities.groupby(['year','week']).agg({'combined_relative_effort':'sum','Activity Date':['min','max']}).reset_index()
    effort_by_week.columns=['year','week','combined_relative_effort','activity_min_date','activity_max_date']
    return effort_by_week

In [80]:
effort_by_week

Unnamed: 0,year,week,combined_relative_effort,activity_min_date,activity_max_date
0,2009,53,0.0,2010-01-01 00:00:00,2010-01-01 00:00:00
1,2012,26,0.0,2012-06-26 22:17:36,2012-06-30 14:05:19
2,2012,27,0.0,2012-07-03 22:30:35,2012-07-03 22:30:35
3,2012,29,0.0,2012-07-17 22:22:28,2012-07-21 12:37:10
4,2012,31,0.0,2012-07-30 17:54:08,2012-08-04 11:48:26
...,...,...,...,...,...
489,2024,30,211.0,2024-07-23 16:15:39,2024-07-28 11:26:42
490,2024,31,231.0,2024-07-30 11:23:38,2024-08-04 13:08:03
491,2024,32,275.0,2024-08-06 11:18:03,2024-08-11 13:11:46
492,2024,33,254.0,2024-08-13 11:24:17,2024-08-18 13:18:30


In [63]:
activities.loc[(activities['year']==2024)&(activities['week']==32),['Relative Effort','Perceived Relative Effort','pr_weighting','pf_effort','combined_relative_effort']]

Unnamed: 0,Relative Effort,Perceived Relative Effort,pr_weighting,pf_effort,combined_relative_effort
2151,,32.0,1.0,32.0,32.0
2152,32.0,49.0,1.0,49.0,49.0
2153,47.0,,0.0,,47.0
2154,56.0,80.0,0.0,,56.0
2155,91.0,,0.0,,91.0


In [14]:
activities['combined_relative_effort']

0        NaN
1        NaN
2        NaN
3        NaN
4        NaN
        ... 
2162    32.0
2163    50.0
2164    80.0
2165    48.0
2166    61.0
Name: combined_relative_effort, Length: 2167, dtype: float64

In [3]:
import os

files_available = os.listdir('../data/processed/')
files_available = [i[:-4] for i in files_available]

In [15]:
details = get_name_and_date(files_available)

In [16]:
details

Unnamed: 0,original_filename,name,date
0,10002481365,Zwift - Sweetspot w/ steeps in Watopia,"Jun 24, 2023, 10:37:10 AM"
1,10010559674,Morning Run,"Jun 25, 2023, 1:18:02 PM"
2,10022163615,30 min Tabata Ride with Hannah Frankson,"Jun 27, 2023, 11:19:42 AM"
3,10029172050,Morning Ride,"Jun 28, 2023, 11:56:32 AM"
4,10035306741,20 min HIIT Ride with Leanne Hainsby,"Jun 29, 2023, 11:16:45 AM"
...,...,...,...
1706,9967756006,"Soccer Game, 2nd Half","Jun 18, 2023, 11:00:06 PM"
1707,9978735480,Lunch Ride,"Jun 20, 2023, 4:37:26 PM"
1708,9985339789,Zwift - Lactate Shuttle Short in Paris,"Jun 21, 2023, 4:16:40 PM"
1709,9990211732,20 min HIIT Ride with Denis Morton,"Jun 22, 2023, 11:28:23 AM"


In [20]:
details['selectbox_name'] = details['original_filename'].str.cat(details['name'],sep=' - ')
details['date'] = pd.to_datetime(details['date'], format='%b %d, %Y, %I:%M:%S %p')
details = details.sort_values(by='date', inplace=False)
# select_list = list(details['selectbox_name'].values)

In [21]:
details

Unnamed: 0,original_filename,name,date,selectbox_name
325,158841451,Work Run,2010-01-01 00:00:00,158841451 - Work Run
223,11823912,"Jun 26, 2012",2012-06-26 22:17:36,"11823912 - Jun 26, 2012"
234,12093460,"Jun 30, 2012",2012-06-30 14:05:19,"12093460 - Jun 30, 2012"
246,12413019,"Jul 3, 2012",2012-07-03 22:30:35,"12413019 - Jul 3, 2012"
283,13691080,Riverview Park,2012-07-17 22:22:28,13691080 - Riverview Park
...,...,...,...,...
201,11274421459,10 KM Alaska Hatcher Pass Ride,2024-01-11 12:25:06,11274421459 - 10 KM Alaska Hatcher Pass Ride
202,11287913946,Zwift - Temple Trek in Watopia,2024-01-13 12:27:16,11287913946 - Zwift - Temple Trek in Watopia
205,11321392571,15 min Tabata Ride with Bradley Rose,2024-01-18 12:35:24,11321392571 - 15 min Tabata Ride with Bradley ...
207,11334046438,Zwift - Volcano Flat in Watopia,2024-01-20 12:31:51,11334046438 - Zwift - Volcano Flat in Watopia


In [96]:
pd.merge(fa_pd,activities,on='original_filename',how='left')

Unnamed: 0,original_filename_with_ext,original_filename,Activity ID,Activity Date,Activity Name,Activity Type,Activity Description,Elapsed Time,Distance,Max Heart Rate,...,Activity Count,Total Steps,Carbon Saved,Pool Length,Training Load,Intensity,Average Grade Adjusted Pace,Timer Time,Total Cycles,Media
0,10002481365.csv,10002481365,9.324803e+09,"Jun 24, 2023, 10:37:10 AM",Zwift - Sweetspot w/ steeps in Watopia,Virtual Ride,,3916.0,36.28,173.0,...,,,,,,,,,,
1,10010559674.csv,10010559674,9.332409e+09,"Jun 25, 2023, 1:18:02 PM",Morning Run,Run,,3333.0,4.63,166.0,...,,5308.0,,,,,,,,
2,10022163615.csv,10022163615,9.343376e+09,"Jun 27, 2023, 11:19:42 AM",30 min Tabata Ride with Hannah Frankson,Ride,Total Output: 521.6 kJ\nLeaderboard Rank: 10 /...,1800.0,19.54,170.0,...,,,,,,,,,,media/456ec25c-b7a4-4f35-9e34-530d17eaa041.jpg
3,10029172050.csv,10029172050,9.350048e+09,"Jun 28, 2023, 11:56:32 AM",Morning Ride,Ride,,2601.0,21.30,,...,,,,,,,,,,
4,10035306741.csv,10035306741,9.355880e+09,"Jun 29, 2023, 11:16:45 AM",20 min HIIT Ride with Leanne Hainsby,Ride,Total Output: 362.9 kJ\nLeaderboard Rank: 63 /...,1200.0,13.30,157.0,...,,,,,,,,,,media/6aa6dc7d-4f43-49e8-97d1-af0c6c084904.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1706,9967756006.csv,9967756006,9.292109e+09,"Jun 18, 2023, 11:00:06 PM","Soccer Game, 2nd Half",Run,,2688.0,3.50,191.0,...,,3586.0,,,,,,,,
1707,9978735480.csv,9978735480,9.302315e+09,"Jun 20, 2023, 4:37:26 PM",Lunch Ride,Ride,,2603.0,22.29,177.0,...,,,,,,,,,,
1708,9985339789.csv,9985339789,9.308551e+09,"Jun 21, 2023, 4:16:40 PM",Zwift - Lactate Shuttle Short in Paris,Virtual Ride,,1909.0,17.88,173.0,...,,,,,,,,,,
1709,9990211732.csv,9990211732,9.313179e+09,"Jun 22, 2023, 11:28:23 AM",20 min HIIT Ride with Denis Morton,Ride,Total Output: 365.3 kJ\nLeaderboard Rank: 95 /...,1200.0,13.30,164.0,...,,,,,,,,,,media/563fb80a-5c65-4946-b14d-c5e1635f85e0.jpg
