In [None]:
import pandas as pd

import seaborn as sns

from health_tracking import AppleHealthParser

%matplotlib inline

In [None]:
parser = AppleHealthParser()

In [None]:
workouts_df, workout_types = parser.extract_workouts()
workout_types

Pick the recorded running workouts.

In [None]:
running_df = workouts_df[workouts_df["workoutActivityType"] == "running"].copy().reset_index()
running_df.head()

#### Create `minutesPerKm` Column

In [None]:
def minutes_per_km(row):
    result = 0
    try:
        result = row["duration"] / row["totalDistance"]
    
    except:
        pass
    
    return result

running_df["minutesPerKm"] = running_df.apply(minutes_per_km, axis=1)
running_df.head()

#### Remove Outliers

Removing values above 8 and below 4 minutes/km.

In [None]:
running_df = running_df[(running_df["minutesPerKm"] <= 8) & (running_df["minutesPerKm"] >= 4)]

#### Plot `minutesPerKm`

In [None]:
sns.jointplot(range(running_df.shape[0]), "minutesPerKm", data=running_df, kind="reg", xlim=(-5, running_df.shape[0] + 5))

#### Use "Natural Distance" for Days

It's not possible to plot a timestamp. Therefore, use day offsets starting with the first training day.

Fixing distance of time axis (x-axis)for workouts, increases the "correctness" of the plot.

In [None]:
first_workout = running_df["creationDate"][0]
last_workout = running_df["creationDate"][running_df.shape[0]]

In [None]:
running_df["dayOffset"] = running_df.apply(lambda row: (row["creationDate"] - first_workout).days, axis=1)

In [None]:
jointplot = sns.jointplot("dayOffset", "minutesPerKm", data=running_df, kind="reg", xlim=(-10, running_df["dayOffset"].max() + 10))

#### Combine `minutesPerKm` and `totalDistance`

Add vertical lines for new years.

In [None]:
def get_year_offsets(first_workout, last_workout):
    
    new_years = last_workout.year - first_workout.year
    result = []
    
    for i in range(new_years):
        new_year_offset = pd.Timestamp(f"1.1.{first_workout.year + i + 1}", tz=first_workout.tz) - first_workout
        result.append(new_year_offset.days)
    
    return result

In [None]:
scatter = sns.scatterplot("dayOffset", "minutesPerKm", data=running_df, hue="totalDistance", )

max_y = running_df["minutesPerKm"].max()

for new_year_offset in get_year_offsets(first_workout, last_workout):
    scatter.axvline(new_year_offset, 0, max_y)

## Same Plots as above with Package Functions

In [None]:
from health_tracking.workouts import Workouts

In [None]:
workouts = Workouts()

In [None]:
workouts.workout_types

In [None]:
workouts["runnings"].head()

In [None]:
joint_plot = workouts.plot("dayOffset", "minutesPerKm", "joint", outlier=(4, 8))

In [None]:
scatter_plot = workouts.plot("dayOffset", "minutesPerKm", "scatter", z="totalDistance", outlier=(4, 8))