# Testing creating API key, begin of OAuth 2 lifecycle

Below, the output is a user specific URL to create a code that will be used by the app to get this user an API key and refresh key. Get the code from the redirect URL after authorizing

In [None]:
import stravalib
from datetime import datetime

load_dotenv()

STRAVA_CLIENT_ID = os.getenv("STRAVA_CLIENT_ID")
STRAVA_CLIENT_SECRET = os.getenv("STRAVA_CLIENT_SECRET")

client = stravalib.Client()
client.authorization_url(
    client_id=STRAVA_CLIENT_ID,
    redirect_uri="http://localhost",
    scope=['read', 'read_all', 'profile:read_all', 'profile:write', 'activity:read', 'activity:read_all', 'activity:write', 'activity:read_permission']
)

Below, the app uses the code to exchange for the user's token and refresh token. Now the app can refresh the token on behalf of the user

In [None]:
response = client.exchange_code_for_token(STRAVA_CLIENT_ID, STRAVA_CLIENT_SECRET, "XXXXXXX")
print(response)

for key, value in {
    "STRAVA_ACCESS_TOKEN": response["access_token"],
    "STRAVA_REFRESH_TOKEN": response["refresh_token"],
    "STRAVA_TOKEN_EXPIRES_AT": str(response["expires_at"])
}.items():
    set_key(".env", key, value)

Saving my token and testing... stravalib.Client manages refresh automatically. Downside is that by it refreshing for me, I can't save the new credentials, so if this process' memory is ever deleted and my key expires, I lose it and have to request the code all over again. In the main scripts I made my own wrapper to handle all this

In [None]:
load_dotenv(override=True)
STRAVA_ACCESS_TOKEN = os.getenv("STRAVA_ACCESS_TOKEN")
STRAVA_REFRESH_TOKEN = os.getenv("STRAVA_REFRESH_TOKEN")
STRAVA_TOKEN_EXPIRES_AT = float(os.getenv("STRAVA_TOKEN_EXPIRES_AT"))
# client = stravalib.Client(access_token=STRAVA_ACCESS_TOKEN, refresh_token=STRAVA_REFRESH_TOKEN, token_expires=STRAVA_TOKEN_EXPIRES_AT)

bfr = datetime.now() + timedelta(days=1)
aftr = datetime.now() - timedelta(days=2)

result = client.get_activities(before=bfr, after=aftr)
[print(r) for r in result]

Using my wrapper to handle refresh

In [None]:
from util.strava_client_interface import StravaWrapper
from datetime import datetime, timedelta

bfr = datetime.now() + timedelta(days=1)
aftr = datetime.now() - timedelta(days=2)

res = StravaWrapper().get_activities(before=bfr, after=aftr)

[print(r) for r in res]

res.next().dict()

## Bronze data exploration

In [5]:
from util.db_connection import PsycopgConnection
import pandas as pd

In [6]:
conn = PsycopgConnection().conn
sql = "SELECT * FROM bronze_all_activity"
df = pd.read_sql(sql, conn)
df.head()

Unnamed: 0,id,achievement_count,athlete,athlete_count,average_speed,average_watts,comment_count,commute,device_watts,distance,...,location_country,pr_count,suffer_score,has_heartrate,average_heartrate,max_heartrate,average_cadence,from_accepted_tag,visibility,ingested_at
0,16150001543,6,"{'id': 105333117, 'resource_state': 1}",2,7.84,157.2,0,False,True,30661.9,...,,2,,False,,,62.2,False,everyone,2025-10-17 16:49:29.601651+00:00
1,16146455214,0,"{'id': 105333117, 'resource_state': 1}",1,2.303,,0,False,,8214.2,...,,0,78.0,True,158.3,180.0,81.1,False,everyone,2025-10-17 16:49:29.601651+00:00
2,16135051178,1,"{'id': 105333117, 'resource_state': 1}",1,2.164,,0,False,,5503.9,...,,0,49.0,True,154.4,169.0,81.0,False,everyone,2025-10-17 16:49:29.601651+00:00
3,16124856929,0,"{'id': 105333117, 'resource_state': 1}",1,8.047,188.6,0,False,True,48456.8,...,,0,,False,,,69.5,False,everyone,2025-10-17 16:49:29.601651+00:00
4,16115294018,5,"{'id': 105333117, 'resource_state': 1}",1,2.354,,0,False,,10041.9,...,,2,221.0,True,179.4,189.0,81.3,False,everyone,2025-10-17 16:49:29.601651+00:00


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 185 entries, 0 to 184
Data columns (total 55 columns):
 #   Column                  Non-Null Count  Dtype              
---  ------                  --------------  -----              
 0   id                      185 non-null    int64              
 1   achievement_count       185 non-null    int64              
 2   athlete                 185 non-null    object             
 3   athlete_count           185 non-null    int64              
 4   average_speed           185 non-null    float64            
 5   average_watts           13 non-null     float64            
 6   comment_count           185 non-null    int64              
 7   commute                 185 non-null    bool               
 8   device_watts            13 non-null     object             
 9   distance                185 non-null    float64            
 10  elapsed_time            185 non-null    int64              
 11  elev_high               183 non-null    float

To drop moving forward:
- commute
- device_watts
- flagged
- has_kudoed
- hide_from_home
- kilojoules
- upload_id_str
- workout_type
- utc_offset
- location_[city/state/country]
- has_heartrate
- from_accepted_tag
- manual
- private
- type (same as sport_type but I like the naming better on the other)

In [8]:
df = df.drop([
    "commute", 
    "device_watts", 
    "flagged", 
    "has_kudoed", 
    "hide_from_home", 
    "kilojoules", 
    "upload_id_str", 
    "workout_type", 
    "utc_offset",
    "location_city",
    "location_state",
    "location_country",
    "has_heartrate",
    "from_accepted_tag",
    "manual",
    "private",
    "type"
], axis=1)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 185 entries, 0 to 184
Data columns (total 38 columns):
 #   Column                  Non-Null Count  Dtype              
---  ------                  --------------  -----              
 0   id                      185 non-null    int64              
 1   achievement_count       185 non-null    int64              
 2   athlete                 185 non-null    object             
 3   athlete_count           185 non-null    int64              
 4   average_speed           185 non-null    float64            
 5   average_watts           13 non-null     float64            
 6   comment_count           185 non-null    int64              
 7   distance                185 non-null    float64            
 8   elapsed_time            185 non-null    int64              
 9   elev_high               183 non-null    float64            
 10  elev_low                183 non-null    float64            
 11  end_latlng              80 non-null     objec

### Normalizing fields

In [9]:
df.head()

Unnamed: 0,id,achievement_count,athlete,athlete_count,average_speed,average_watts,comment_count,distance,elapsed_time,elev_high,...,trainer,upload_id,weighted_average_watts,pr_count,suffer_score,average_heartrate,max_heartrate,average_cadence,visibility,ingested_at
0,16150001543,6,"{'id': 105333117, 'resource_state': 1}",2,7.84,157.2,0,30661.9,3911,15.6,...,False,17233500000.0,159.0,2,,,,62.2,everyone,2025-10-17 16:49:29.601651+00:00
1,16146455214,0,"{'id': 105333117, 'resource_state': 1}",1,2.303,,0,8214.2,3760,15.7,...,False,17229920000.0,,0,78.0,158.3,180.0,81.1,everyone,2025-10-17 16:49:29.601651+00:00
2,16135051178,1,"{'id': 105333117, 'resource_state': 1}",1,2.164,,0,5503.9,2566,15.6,...,False,17218420000.0,,0,49.0,154.4,169.0,81.0,everyone,2025-10-17 16:49:29.601651+00:00
3,16124856929,0,"{'id': 105333117, 'resource_state': 1}",1,8.047,188.6,0,48456.8,6022,131.8,...,False,17208130000.0,209.0,0,,,,69.5,everyone,2025-10-17 16:49:29.601651+00:00
4,16115294018,5,"{'id': 105333117, 'resource_state': 1}",1,2.354,,0,10041.9,4265,9.6,...,False,17198410000.0,,2,221.0,179.4,189.0,81.3,everyone,2025-10-17 16:49:29.601651+00:00


In [10]:
df["athlete_id"] = df["athlete"].apply(lambda x: x["id"]).astype(int)
df["athlete_id"].head()

0    105333117
1    105333117
2    105333117
3    105333117
4    105333117
Name: athlete_id, dtype: int64

In [11]:
df["end_lat"] = df["end_latlng"].apply(lambda x: x[0] if x is not None else None).astype(float)
df["end_lng"] = df["end_latlng"].apply(lambda x: x[1] if x is not None else None).astype(float)

df[["end_lat", "end_lng"]].head()

Unnamed: 0,end_lat,end_lng
0,-11.650947,166.947503
1,-8.045512,-34.896431
2,-8.045453,-34.897294
3,-11.658962,166.974495
4,-8.141395,-34.903384


In [12]:
df["start_lat"] = df["start_latlng"].apply(lambda x: x[0] if x is not None else None).astype(float)
df["start_lng"] = df["start_latlng"].apply(lambda x: x[1] if x is not None else None).astype(float)

df[["start_lat", "start_lng"]].head()

Unnamed: 0,start_lat,start_lng
0,-11.63549,166.952361
1,-8.045901,-34.895703
2,-8.045869,-34.895688
3,-11.636288,166.950131
4,-8.141454,-34.903365


In [13]:
df["map_polyline"] = df["map"].apply(lambda x: x["summary_polyline"]).astype(str)
df["map_polyline"].head(25)

0     xq_fAes~y^pAzCvBrBtC`BtGnBjBdArEpKD^]PgBcB{AW_...
1     dkbp@xz~sESvA?TEPGB?REh@ELE@OD[CqA@[AUFYDkA?GD...
2     `kbp@h{~sEAf@GTC`@UbAIr@Od@IlAEjACFBlBCz@BnAHj...
3     ~q_fAq|~y^zIoHQeAyCmCbCwAqAiOfAkExCsClZyFnAiAj...
4     tytp@`~_tEc@MKIWGe@C_@M}@Qc@Me@Gq@B}@_@g@Q_@GW...
5                                                      
6     bq_fAy{bz^y@qCy@wDD}@n@sC@}@Uw@{AiCGe@Tm@h@YrB...
7                                                      
8     tjbp@`~~sESz@AVKLu@Je@@y@OSBy@@c@HWJi@Hc@Li@JS...
9                                                      
10    hjbp@f__tESjACDa@FQEg@Aa@Ii@?YP}@C]DWH}@L{ANUF...
11                                                     
12                                                     
13    dkbp@tz~sEEVEp@If@Od@G\Ud@K?QI_@Bi@?i@DYPIKq@A...
14    `}bp@`c~sEv@aBPWJGFKHg@Le@Zo@Le@Ri@l@}B\gCJwAH...
15                                                     
16                                                     
17                                              

In [14]:
df["upload_id"] = pd.to_numeric(df["upload_id"], errors='coerce').astype('Int64')
df["upload_id"].head()

0    17233502746
1    17229915919
2    17218420332
3    17208134526
4    17198412372
Name: upload_id, dtype: Int64

In [15]:
df = df.drop([
    "athlete",
    "end_latlng",
    "start_latlng",
    "map"
], axis=1)
df

Unnamed: 0,id,achievement_count,athlete_count,average_speed,average_watts,comment_count,distance,elapsed_time,elev_high,elev_low,...,max_heartrate,average_cadence,visibility,ingested_at,athlete_id,end_lat,end_lng,start_lat,start_lng,map_polyline
0,16150001543,6,2,7.840,157.2,0,30661.9,3911,15.6,-6.2,...,,62.2,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-11.650947,166.947503,-11.635490,166.952361,xq_fAes~y^pAzCvBrBtC`BtGnBjBdArEpKD^]PgBcB{AW_...
1,16146455214,0,1,2.303,,0,8214.2,3760,15.7,10.0,...,180.0,81.1,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-8.045512,-34.896431,-8.045901,-34.895703,dkbp@xz~sESvA?TEPGB?REh@ELE@OD[CqA@[AUFYDkA?GD...
2,16135051178,1,1,2.164,,0,5503.9,2566,15.6,11.0,...,169.0,81.0,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-8.045453,-34.897294,-8.045869,-34.895688,`kbp@h{~sEAf@GTC`@UbAIr@Od@IlAEjACFBlBCz@BnAHj...
3,16124856929,0,1,8.047,188.6,0,48456.8,6022,131.8,-6.2,...,,69.5,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-11.658962,166.974495,-11.636288,166.950131,~q_fAq|~y^zIoHQeAyCmCbCwAqAiOfAkExCsClZyFnAiAj...
4,16115294018,5,1,2.354,,0,10041.9,4265,9.6,8.3,...,189.0,81.3,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-8.141395,-34.903384,-8.141454,-34.903365,tytp@`~_tEc@MKIWGe@C_@M}@Qc@Me@Gq@B}@_@g@Q_@GW...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
180,7597729981,44,1,3.521,,0,36231.5,13048,20.4,6.3,...,,,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-8.046337,-34.894611,-8.047144,-34.895312,r}bp@vc~sErBeFfBaFz@iHc@yGfAuEfLqPxA}AhCiFfNeW...
181,7521870897,37,1,4.706,,0,29604.9,6974,17.2,8.4,...,,,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-8.046125,-34.894566,-8.046290,-34.894694,j|bp@rf~sEbA}BIQJNHo@xBcF~@uDr@eGq@uEnAkG|GwJj...
182,7446895768,4,1,5.173,,0,34007.7,7299,17.2,8.4,...,,,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-8.046166,-34.894506,-8.046150,-34.894588,nobp@dj~sELHY@FbAWi@@U^TQOh@Iq@?FBXk@lA\|BpAp@...
183,16177930498,6,1,2.262,,0,11002.3,4869,9.6,7.7,...,180.0,81.5,everyone,2025-10-18 16:32:29.737640+00:00,105333117,-8.107144,-34.887845,-8.108466,-34.888274,l}np@vf}sEv@R|@\XR|@Rx@Zd@XxAVjCdAtCbBfBx@lAz@...


## Analysing fields when type is Run

In [16]:
df_run = df[df["sport_type"] == "Run"]
df_run.info()

<class 'pandas.core.frame.DataFrame'>
Index: 95 entries, 1 to 184
Data columns (total 40 columns):
 #   Column                  Non-Null Count  Dtype              
---  ------                  --------------  -----              
 0   id                      95 non-null     int64              
 1   achievement_count       95 non-null     int64              
 2   athlete_count           95 non-null     int64              
 3   average_speed           95 non-null     float64            
 4   average_watts           0 non-null      float64            
 5   comment_count           95 non-null     int64              
 6   distance                95 non-null     float64            
 7   elapsed_time            95 non-null     int64              
 8   elev_high               93 non-null     float64            
 9   elev_low                93 non-null     float64            
 10  external_id             93 non-null     object             
 11  gear_id                 87 non-null     object     

Some fields null because metrics are related to another type of sport (cycling in general)

In [17]:
df_run = df_run.drop([
    "average_watts",
    "max_watts",
    "weighted_average_watts",
],
axis=1)

df_run.info()

<class 'pandas.core.frame.DataFrame'>
Index: 95 entries, 1 to 184
Data columns (total 37 columns):
 #   Column                Non-Null Count  Dtype              
---  ------                --------------  -----              
 0   id                    95 non-null     int64              
 1   achievement_count     95 non-null     int64              
 2   athlete_count         95 non-null     int64              
 3   average_speed         95 non-null     float64            
 4   comment_count         95 non-null     int64              
 5   distance              95 non-null     float64            
 6   elapsed_time          95 non-null     int64              
 7   elev_high             93 non-null     float64            
 8   elev_low              93 non-null     float64            
 9   external_id           93 non-null     object             
 10  gear_id               87 non-null     object             
 11  kudos_count           95 non-null     int64              
 12  max_speed     

In [18]:
df_run

Unnamed: 0,id,achievement_count,athlete_count,average_speed,comment_count,distance,elapsed_time,elev_high,elev_low,external_id,...,max_heartrate,average_cadence,visibility,ingested_at,athlete_id,end_lat,end_lng,start_lat,start_lng,map_polyline
1,16146455214,0,1,2.303,0,8214.2,3760,15.7,10.0,garmin_ping_491115119561,...,180.0,81.1,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-8.045512,-34.896431,-8.045901,-34.895703,dkbp@xz~sESvA?TEPGB?REh@ELE@OD[CqA@[AUFYDkA?GD...
2,16135051178,1,1,2.164,0,5503.9,2566,15.6,11.0,garmin_ping_490751024155,...,169.0,81.0,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-8.045453,-34.897294,-8.045869,-34.895688,`kbp@h{~sEAf@GTC`@UbAIr@Od@IlAEjACFBlBCz@BnAHj...
4,16115294018,5,1,2.354,0,10041.9,4265,9.6,8.3,garmin_ping_490058388208,...,189.0,81.3,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-8.141395,-34.903384,-8.141454,-34.903365,tytp@`~_tEc@MKIWGe@C_@M}@Qc@Me@Gq@B}@_@g@Q_@GW...
5,16022153602,0,1,2.195,0,7000.0,3189,0.0,0.0,garmin_ping_486966935977,...,177.0,81.7,everyone,2025-10-17 16:49:29.601651+00:00,105333117,,,,,
8,16082941463,1,1,2.301,0,5013.3,2193,15.7,11.0,garmin_ping_488978228106,...,180.0,81.6,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-8.046474,-34.894529,-8.045601,-34.896103,tjbp@`~~sESz@AVKLu@Je@@y@OSBy@@c@HWJi@Hc@Li@JS...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176,11169839326,15,1,2.553,0,5241.7,2261,16.6,15.6,stripped_health_data_105333117_1712999703.fit,...,,,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-8.035893,-34.904467,-8.036509,-34.904311,nr`p@zq`tERBHCVQZYPGNS?SQe@AAC@CCO_@OAEEDG?EIM...
177,11117145681,0,3,2.196,0,2987.1,1363,8.6,1.8,stripped_health_data_105333117_1712396835.fit,...,,,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-8.101038,-34.884919,-8.106488,-34.887421,f|mp@dw|sE[E}@Wi@Gu@]e@MYMYIs@]{@[UCaBq@g@OoAk...
178,11066882010,26,2,2.221,0,5433.5,2898,16.6,14.9,stripped_health_data_105333117_1711790148.fit,...,,,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-8.036405,-34.904158,-8.036583,-34.903937,lp`p@fo`tEDJ?ABBJ?PL@BCAB@?HHLEE@?GC@@E@AA\RJL...
183,16177930498,6,1,2.262,0,11002.3,4869,9.6,7.7,garmin_ping_492174361241,...,180.0,81.5,everyone,2025-10-18 16:32:29.737640+00:00,105333117,-8.107144,-34.887845,-8.108466,-34.888274,l}np@vf}sEv@R|@\XR|@Rx@Zd@XxAVjCdAtCbBfBx@lAz@...


## Fields when type is Ride or VirtualRide

In [19]:
df_ride = df[(df["sport_type"] == "Ride") | (df["sport_type"] == "VirtualRide")]
df_ride.info()

<class 'pandas.core.frame.DataFrame'>
Index: 87 entries, 0 to 182
Data columns (total 40 columns):
 #   Column                  Non-Null Count  Dtype              
---  ------                  --------------  -----              
 0   id                      87 non-null     int64              
 1   achievement_count       87 non-null     int64              
 2   athlete_count           87 non-null     int64              
 3   average_speed           87 non-null     float64            
 4   average_watts           13 non-null     float64            
 5   comment_count           87 non-null     int64              
 6   distance                87 non-null     float64            
 7   elapsed_time            87 non-null     int64              
 8   elev_high               87 non-null     float64            
 9   elev_low                87 non-null     float64            
 10  external_id             87 non-null     object             
 11  gear_id                 0 non-null      object     

In [20]:
df_ride

Unnamed: 0,id,achievement_count,athlete_count,average_speed,average_watts,comment_count,distance,elapsed_time,elev_high,elev_low,...,max_heartrate,average_cadence,visibility,ingested_at,athlete_id,end_lat,end_lng,start_lat,start_lng,map_polyline
0,16150001543,6,2,7.840,157.2,0,30661.9,3911,15.6,-6.2,...,,62.2,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-11.650947,166.947503,-11.635490,166.952361,xq_fAes~y^pAzCvBrBtC`BtGnBjBdArEpKD^]PgBcB{AW_...
3,16124856929,0,1,8.047,188.6,0,48456.8,6022,131.8,-6.2,...,,69.5,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-11.658962,166.974495,-11.636288,166.950131,~q_fAq|~y^zIoHQeAyCmCbCwAqAiOfAkExCsClZyFnAiAj...
6,16110063242,0,1,9.007,178.6,0,16419.9,1823,17.6,12.6,...,,67.8,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-11.642264,166.992512,-11.636142,166.972513,bq_fAy{bz^y@qCy@wDD}@n@sC@}@Uw@{AiCGe@Tm@h@YrB...
7,16093491911,0,1,7.254,117.4,0,25048.2,3453,0.0,0.0,...,,67.8,everyone,2025-10-17 16:49:29.601651+00:00,105333117,,,,,
9,16072362664,0,1,7.537,129.7,0,45230.7,6001,0.0,0.0,...,,63.2,everyone,2025-10-17 16:49:29.601651+00:00,105333117,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,12128252132,0,1,3.193,,0,555.6,174,0.0,0.0,...,139.0,,everyone,2025-10-17 16:49:29.601651+00:00,105333117,,,,,
179,7598459186,29,1,5.134,,0,30659.1,6739,17.2,8.6,...,,,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-8.046307,-34.894580,-8.046365,-34.894811,j|bp@te~sEbBiEBUd@}@xB_HZ}CCi@La@K{AYuACq@fAmF...
180,7597729981,44,1,3.521,,0,36231.5,13048,20.4,6.3,...,,,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-8.046337,-34.894611,-8.047144,-34.895312,r}bp@vc~sErBeFfBaFz@iHc@yGfAuEfLqPxA}AhCiFfNeW...
181,7521870897,37,1,4.706,,0,29604.9,6974,17.2,8.4,...,,,everyone,2025-10-17 16:49:29.601651+00:00,105333117,-8.046125,-34.894566,-8.046290,-34.894694,j|bp@rf~sEbA}BIQJNHo@xBcF~@uDr@eGq@uEnAkG|GwJj...


difference is only that runs do not have wattage. Does not justify creating 2 tables so we'll keep 1

In [21]:
for col in df.columns:
    if df[col].isna().any():
        print(f"{col} has NA values")

average_watts has NA values
elev_high has NA values
elev_low has NA values
external_id has NA values
gear_id has NA values
max_watts has NA values
upload_id has NA values
weighted_average_watts has NA values
suffer_score has NA values
average_heartrate has NA values
max_heartrate has NA values
average_cadence has NA values
end_lat has NA values
end_lng has NA values
start_lat has NA values
start_lng has NA values


In [22]:
df = df.where(pd.notna(df), None)
for col in df.columns:
    if df[col].isna().any():
        print(f"{col} has NA values")

average_watts has NA values
elev_high has NA values
elev_low has NA values
external_id has NA values
gear_id has NA values
max_watts has NA values
upload_id has NA values
weighted_average_watts has NA values
suffer_score has NA values
average_heartrate has NA values
max_heartrate has NA values
average_cadence has NA values
end_lat has NA values
end_lng has NA values
start_lat has NA values
start_lng has NA values


In [23]:
for col in df.columns:
    if df[col].isnull().any():
        print(f"{col} has NULL values")

average_watts has NULL values
elev_high has NULL values
elev_low has NULL values
external_id has NULL values
gear_id has NULL values
max_watts has NULL values
upload_id has NULL values
weighted_average_watts has NULL values
suffer_score has NULL values
average_heartrate has NULL values
max_heartrate has NULL values
average_cadence has NULL values
end_lat has NULL values
end_lng has NULL values
start_lat has NULL values
start_lng has NULL values


In [24]:
import numpy as np

In [25]:
for col in df.columns:
    none_count = df[col].apply(lambda x: x is None).sum()
    nan_count = df[col].apply(lambda x: isinstance(x, float) and np.isnan(x)).sum()
    pda_count = df[col].apply(lambda x: x is pd.NA).sum()
    
    if none_count + nan_count + pda_count > 0:
        print(f"{col}: None={none_count}, np.nan={nan_count}, pd.NA={pda_count}")


average_watts: None=0, np.nan=172, pd.NA=0
elev_high: None=0, np.nan=2, pd.NA=0
elev_low: None=0, np.nan=2, pd.NA=0
external_id: None=2, np.nan=0, pd.NA=0
gear_id: None=98, np.nan=0, pd.NA=0
max_watts: None=0, np.nan=172, pd.NA=0
upload_id: None=0, np.nan=2, pd.NA=0
weighted_average_watts: None=0, np.nan=172, pd.NA=0
suffer_score: None=0, np.nan=17, pd.NA=0
average_heartrate: None=0, np.nan=25, pd.NA=0
max_heartrate: None=0, np.nan=25, pd.NA=0
average_cadence: None=0, np.nan=87, pd.NA=0
end_lat: None=0, np.nan=105, pd.NA=0
end_lng: None=0, np.nan=105, pd.NA=0
start_lat: None=0, np.nan=105, pd.NA=0
start_lng: None=0, np.nan=105, pd.NA=0


In [26]:
df = df.replace({np.nan: None})


for col in df.columns:
    none_count = df[col].apply(lambda x: x is None).sum()
    nan_count = df[col].apply(lambda x: isinstance(x, float) and np.isnan(x)).sum()
    pda_count = df[col].apply(lambda x: x is pd.NA).sum()
    
    if none_count + nan_count + pda_count > 0:
        print(f"{col}: None={none_count}, np.nan={nan_count}, pd.NA={pda_count}")


average_watts: None=172, np.nan=0, pd.NA=0
elev_high: None=2, np.nan=0, pd.NA=0
elev_low: None=2, np.nan=0, pd.NA=0
external_id: None=2, np.nan=0, pd.NA=0
gear_id: None=98, np.nan=0, pd.NA=0
max_watts: None=172, np.nan=0, pd.NA=0
upload_id: None=2, np.nan=0, pd.NA=0
weighted_average_watts: None=172, np.nan=0, pd.NA=0
suffer_score: None=17, np.nan=0, pd.NA=0
average_heartrate: None=25, np.nan=0, pd.NA=0
max_heartrate: None=25, np.nan=0, pd.NA=0
average_cadence: None=87, np.nan=0, pd.NA=0
end_lat: None=105, np.nan=0, pd.NA=0
end_lng: None=105, np.nan=0, pd.NA=0
start_lat: None=105, np.nan=0, pd.NA=0
start_lng: None=105, np.nan=0, pd.NA=0


# Athlete endpoints

In [27]:
from util.strava_client_interface import StravaWrapper

strava = StravaWrapper()

athlete = strava.get_athlete_personal_info()

In [28]:
athlete.model_dump()

{'id': 105333117,
 'city': 'Recife',
 'country': None,
 'created_at': datetime.datetime(2022, 7, 7, 19, 10, 14, tzinfo=TzInfo(0)),
 'firstname': 'Luis',
 'lastname': 'Cruz',
 'premium': True,
 'profile': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/105333117/38187502/5/large.jpg',
 'profile_medium': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/105333117/38187502/5/medium.jpg',
 'resource_state': 3,
 'sex': 'M',
 'state': 'Brasil',
 'summit': True,
 'updated_at': datetime.datetime(2025, 10, 11, 16, 6, 31, tzinfo=TzInfo(0)),
 'bikes': [],
 'clubs': [],
 'follower_count': 5,
 'friend_count': 12,
 'ftp': 188,
 'measurement_preference': 'meters',
 'shoes': [{'distance': 70282.0,
   'id': 'g23811341',
   'name': 'Olympikus Corre 4',
   'primary': False,
   'resource_state': 2},
  {'distance': 375873.0,
   'id': 'g24651855',
   'name': 'New Balance Rebel v4',
   'primary': False,
   'resource_state': 2}],
 'weight': 91.0,
 'athlete_type': 'runner',
 'friend': None,
 'fol

In [29]:
stats = strava.get_athlete_personal_stats()
stats.model_dump()

{'all_ride_totals': {'achievement_count': None,
  'count': 87,
  'distance': 511865.9999999999,
  'elapsed_time': 154950,
  'elevation_gain': 1018.1999999999999,
  'moving_time': 147348},
 'all_run_totals': {'achievement_count': None,
  'count': 95,
  'distance': 480278.9,
  'elapsed_time': 223032,
  'elevation_gain': 486.7000000000001,
  'moving_time': 215133},
 'all_swim_totals': {'achievement_count': None,
  'count': 0,
  'distance': 0.0,
  'elapsed_time': 0,
  'elevation_gain': 0.0,
  'moving_time': 0},
 'biggest_climb_elevation_gain': 14.0,
 'biggest_ride_distance': 36231.5,
 'recent_ride_totals': {'achievement_count': 0,
  'count': 10,
  'distance': 234939.8,
  'elapsed_time': 30986,
  'elevation_gain': 634.0,
  'moving_time': 30986},
 'recent_run_totals': {'achievement_count': 0,
  'count': 20,
  'distance': 141077.9,
  'elapsed_time': 64699,
  'elevation_gain': 101.7,
  'moving_time': 62786},
 'recent_swim_totals': {'achievement_count': 0,
  'count': 0,
  'distance': 0.0,
  'el

In [30]:
import polyline


longest_run = df_run.loc[df_run["distance"].idxmax()]

polyline_str = longest_run["map_polyline"]

coords = polyline.decode(polyline_str)

# Create a DataFrame for Streamlit
df = pd.DataFrame(coords, columns=["lat", "lon"])

# return df
df["coordinates"] = df[["lon", "lat"]].values.tolist()

df

Unnamed: 0,lat,lon,coordinates
0,-8.04556,-34.89681,"[-34.89681, -8.04556]"
1,-8.04552,-34.89711,"[-34.89711, -8.04552]"
2,-8.04542,-34.89745,"[-34.89745, -8.04542]"
3,-8.04539,-34.89773,"[-34.89773, -8.04539]"
4,-8.04531,-34.89803,"[-34.89803, -8.04531]"
...,...,...,...
362,-8.04530,-34.89764,"[-34.89764, -8.0453]"
363,-8.04548,-34.89692,"[-34.89692, -8.04548]"
364,-8.04548,-34.89670,"[-34.8967, -8.04548]"
365,-8.04555,-34.89631,"[-34.89631, -8.04555]"


In [31]:
for col in df.columns.tolist():
    print(df[col].isna().sum())

0
0
0


In [32]:
df2 = pd.DataFrame({
    "path": [[lon, lat] for lat, lon in coords]  # notice the order: [lon, lat]
})

df2

Unnamed: 0,path
0,"[-34.89681, -8.04556]"
1,"[-34.89711, -8.04552]"
2,"[-34.89745, -8.04542]"
3,"[-34.89773, -8.04539]"
4,"[-34.89803, -8.04531]"
...,...
362,"[-34.89764, -8.0453]"
363,"[-34.89692, -8.04548]"
364,"[-34.8967, -8.04548]"
365,"[-34.89631, -8.04555]"
