In [50]:
import pandas as pd
from pathlib import Path

# get the latest downloaded dataset
csv_file = sorted((Path().absolute().parent / "data/raw").glob("*.csv"))[-1]
df = pd.read_csv(csv_file)


In [51]:
# preprocess the dataframe
keep_columns = [
    "name",
    "distance",
    "moving_time",
    "elapsed_time",
    "total_elevation_gain",
    "type",
    "sport_type",
    "workout_type",
    "start_date",
    "start_date_local",
    "average_speed",
    "max_speed",
    "average_cadence",
    "average_heartrate",
    "max_heartrate",
    "elev_high",
    "elev_low",
]

df = df[keep_columns]
display(df)


Unnamed: 0,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,start_date,start_date_local,average_speed,max_speed,average_cadence,average_heartrate,max_heartrate,elev_high,elev_low
0,Patellar Tendon Recovery,0.0,979,979,0.0,Workout,Workout,,2022-11-13T17:21:12Z,2022-11-13T12:21:12Z,0.000,0.000,,,,,
1,Lunch Run,1588.4,573,573,0.0,Run,Run,0.0,2022-11-13T17:09:51Z,2022-11-13T12:09:51Z,2.772,3.704,,144.5,155.0,43.3,42.5
2,Morning Run,3089.9,1048,1079,6.9,Run,Run,,2022-11-12T14:03:46Z,2022-11-12T09:03:46Z,2.948,4.340,84.7,150.4,169.0,22.7,4.2
3,Morning Run,4123.5,1387,1446,15.7,Run,Run,0.0,2022-11-11T15:42:39Z,2022-11-11T10:42:39Z,2.973,4.872,87.0,151.8,171.0,24.7,1.2
4,Afternoon Run,3483.3,1347,1396,2.2,Run,Run,0.0,2022-11-10T19:41:26Z,2022-11-10T14:41:26Z,2.586,3.406,83.9,139.5,152.0,21.9,2.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,Evening Run,3845.2,1296,1380,23.7,Run,Run,,2019-10-06T23:12:06Z,2019-10-06T19:12:06Z,2.967,4.600,81.8,162.4,182.0,37.2,23.9
306,Evening Run,0.0,820,820,0.0,Run,Run,3.0,2019-04-06T02:40:00Z,2019-04-05T19:40:00Z,0.000,0.000,,,,,
307,Evening Run,0.0,878,878,0.0,Run,Run,3.0,2019-04-04T22:30:00Z,2019-04-04T15:30:00Z,0.000,0.000,,,,,
308,Afternoon Run,15024.6,3909,3954,55.7,Run,Run,,2013-11-29T22:22:38Z,2013-11-29T17:22:38Z,3.844,4.456,88.5,,,292.8,268.5


In [52]:
df.start_date_local = pd.to_datetime(df.start_date_local)
week_agg = df.resample(rule='W', on='start_date_local')['distance'].sum()

In [53]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots

MILE_CONST = 1609.3435021011532626
df["hrrs"] = df.average_speed / df.average_heartrate * 1e6

week_agg = pd.concat(
    [
        df.resample(rule="W", on="start_date_local")["distance"].sum(),
        df.resample(rule="W", on="start_date_local")["hrrs"].mean(),
    ],
    axis=1,
)

week_agg_filt = week_agg.loc[week_agg.index > "2020"]
# week_agg_filt = week_agg_filt.loc[week_agg_filt.distance > 0]
week_agg_filt = week_agg_filt / MILE_CONST

fig = plotly.subplots.make_subplots(rows=2, cols=1)
fig.add_trace(go.Scatter(x=week_agg_filt.index, y=week_agg_filt.distance), row=1, col=1)
fig.add_trace(
    go.Scatter(x=week_agg_filt.index, y=week_agg_filt.hrrs, mode="markers"),
    row=2,
    col=1,
)
fig.show()
