# Garmin overview stats

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tkinter as tk
import datetime
from tkinter import filedialog
import ipywidgets as widgets
import copy

pd.options.mode.copy_on_write = True

### Import Garmin CSV

In [None]:
root = tk.Tk()
root.withdraw()
root.call("wm", "attributes", ".", "-topmost", True)
file_path = filedialog.askopenfilename(
    title="Select Garmin CSV", initialdir=os.getcwd()
)
print("Selected file:")
print(file_path)

### Import and preprocess dataframe

In [None]:
df_gar = pd.read_csv(file_path, sep=",", header=0, index_col=None, decimal=",")

df_gar.loc[df_gar["Activity Type"].isin(["Pool Swim", "Open Water Swimming"]), "Distance"] = df_gar.loc[df_gar["Activity Type"].isin(["Pool Swim", "Open Water Swimming"]), "Distance"] / 1000
df_gar.loc[:,"Time"] = [item.replace("NaN", "00") for item in df_gar.loc[:,"Time"]]
df_gar.loc[:,"Time_dt"] = [(datetime.datetime.strptime(item, "%H:%M:%S,%f") if len(item) == 10 else datetime.datetime.strptime(item, "%H:%M:%S")) for item in df_gar.loc[:,"Time"]]
df_gar.loc[:,"Time_min"] = [(item.hour * 60 + item.minute + item.second / 60 + item.second / (60*100000)) for item in df_gar.loc[:,"Time_dt"]]
df_gar.loc[:,"Date_dt"] = [datetime.datetime.strptime(item, "%Y-%m-%d %H:%M:%S") for item in df_gar.loc[:,"Date"]]
df_gar.loc[:,"Year"] = [item.year for item in df_gar.loc[:,"Date_dt"]]


### Summary statistics

In [None]:
n_workouts = len(df_gar["Activity Type"])
print("Total number of workouts: {}".format(n_workouts))

activity_types = np.unique(df_gar["Activity Type"])
n_activity_types = np.zeros(len(activity_types))
for i, activity_type in enumerate(activity_types):
    n_activity_types[i] = len(
        df_gar["Activity Type"][df_gar["Activity Type"] == activity_type]
    )

activity_type_sort_idx = np.flip(np.argsort(n_activity_types))
activity_types_sorted = activity_types[activity_type_sort_idx]
n_activity_types_sorted = n_activity_types[activity_type_sort_idx]

dfs_type = {}
for i, activity_type in enumerate(activity_types_sorted):
    print(
        "     {}: {} {}".format(
            activity_type,
            int(n_activity_types_sorted[i]),
            ("activities" if int(n_activity_types_sorted[i]) > 1 else "activity"),
        )
    )
    dfs_type[activity_type] = df_gar[df_gar["Activity Type"] == activity_type]

print(" ")

### Select activity type

In [None]:
activity_types_widget = np.append("All", activity_types_sorted)
type_select = widgets.Select(
    options=activity_types_widget,
    value=activity_types_widget[0],
    description="Activity type to analyse:",
    style={"description_width": "initial"},
)

years_widget = np.unique(df_gar["Year"])
years_widget = np.sort(years_widget)[::-1]
years_widget = np.append("All", years_widget)
year_select = widgets.Select(
    options=years_widget,
    value=years_widget[0],
    description="Activity type to analyse:",
    style={"description_width": "initial"},
)

combine_select = widgets.Checkbox(
    value=True,
    description='Combine similar activity types'
)

confirm_button = widgets.Button(description="Confirm", disabled=False)
display(type_select, year_select, combine_select, confirm_button)


def confirm_button_func(a):
    global activity_type_selected
    global activity_type_df
    global year_selected
    global combine_activity_types

    activity_type_selected = type_select.value
    combine_activity_types = combine_select.value
    
    if activity_type_selected == "All":
        activity_type_df = copy.deepcopy(df_gar)
        if combine_activity_types == True:
            activity_type_df.loc[activity_type_df.loc[:,"Activity Type"] == "Treadmill Running", "Activity Type"] = "Running"
            activity_type_df.loc[activity_type_df.loc[:,"Activity Type"] == "Mountain Biking", "Activity Type"] = "Cycling"
            activity_type_df.loc[activity_type_df.loc[:,"Activity Type"] == "Walking", "Activity Type"] = "Hiking"
            activity_type_df.loc[activity_type_df.loc[:,"Activity Type"] == "Pool Swim", "Activity Type"] = "Swimming"
            activity_type_df.loc[activity_type_df.loc[:,"Activity Type"] == "Open Water Swimming", "Activity Type"] = "Swimming"
    else:
        activity_type_df = copy.deepcopy(dfs_type[activity_type_selected])
        if combine_activity_types == True:
            if activity_type_selected == "Running":
                activity_type_df = pd.concat([activity_type_df, dfs_type["Treadmill Running"]])
            elif activity_type_selected == "Treadmill Running":
                activity_type_df = pd.concat([activity_type_df, dfs_type["Running"]])
            elif activity_type_selected == "Cycling":
                activity_type_df = pd.concat([activity_type_df, dfs_type["Mountain Biking"]])
            elif activity_type_selected == "Mountain Biking":
                activity_type_df = pd.concat([activity_type_df, dfs_type["Cycling"]])
            elif activity_type_selected == "Hiking":
                activity_type_df = pd.concat([activity_type_df, dfs_type["Walking"]])
            elif activity_type_selected == "Walking":
                activity_type_df = pd.concat([activity_type_df, dfs_type["Hiking"]])
            elif activity_type_selected == "Pool Swim":
                activity_type_df = pd.concat([activity_type_df, dfs_type["Open Water Swimming"]])
            elif activity_type_selected == "Open Water Swimming":
                activity_type_df = pd.concat([activity_type_df, dfs_type["Pool Swim"]])

    year_selected = year_select.value
    if year_selected == "All":
        pass
    else:
        year_selected = int(year_selected)
        activity_type_df = activity_type_df.loc[activity_type_df.loc[:,"Year"] == year_selected, :]   

    print("Selected activity type: " + activity_type_selected)
    print("Selected year: " + str(year_selected))
    return activity_type_selected, year_selected


confirm_button.on_click(confirm_button_func)

### Summary statistics for activity type and year

In [None]:
print("Activity type: {}".format(activity_type_selected))
print("Year: {}".format(year_selected))
n_workouts_ty = len(activity_type_df["Activity Type"])
print("Total number of workouts: {}".format(n_workouts_ty))
print("Total distance: {} km".format(np.round(np.sum(activity_type_df.loc[:, "Distance"]), 1)))
print("Total duration: {} hours".format(np.round(np.sum(activity_type_df.loc[:, "Time_min"]) / 60), 1))

if activity_type_selected == "All":
    activity_types_ty = np.unique(activity_type_df["Activity Type"])
    n_activity_types_ty = np.zeros(len(activity_types_ty))
    distance_activity_types_ty = np.zeros(len(activity_types_ty))
    hours_activity_types_ty = np.zeros(len(activity_types_ty))
    for i, activity_type in enumerate(activity_types_ty):
        n_activity_types_ty[i] = len(
            activity_type_df["Activity Type"][activity_type_df["Activity Type"] == activity_type]
        )
        distance_activity_types_ty[i] = np.sum(activity_type_df.loc[activity_type_df.loc[:,"Activity Type"] == activity_type, "Distance"])
        hours_activity_types_ty[i] = np.sum(activity_type_df.loc[activity_type_df.loc[:,"Activity Type"] == activity_type, "Time_min"]) / 60

    activity_type_sort_idx_ty = np.flip(np.argsort(n_activity_types_ty))
    activity_types_sorted_ty = activity_types_ty[activity_type_sort_idx_ty]
    n_activity_types_sorted_ty = n_activity_types_ty[activity_type_sort_idx_ty]
    distance_activity_types_sorted_ty = distance_activity_types_ty[activity_type_sort_idx_ty]
    hours_activity_types_sorted_ty = hours_activity_types_ty[activity_type_sort_idx_ty]

    for i, activity_type in enumerate(activity_types_sorted_ty):
        print(
            "     {}: {} {}, {} km, {} hours".format(
                activity_type,
                int(n_activity_types_sorted_ty[i]),
                ("activities" if int(n_activity_types_sorted_ty[i]) > 1 else "activity"),
                np.round(distance_activity_types_sorted_ty[i], 1),
                np.round(hours_activity_types_sorted_ty[i], 1)
            )
        )

    print(" ")


### Distance

In [None]:
print(
    "Min, max, mean, median distance: {:0.2f}, {:0.2f}, {:0.2f}, {:0.2f} km".format(
        np.min(activity_type_df["Distance"]),
        np.max(activity_type_df["Distance"]),
        np.mean(activity_type_df["Distance"]),
        np.median(activity_type_df["Distance"]),
    )
)

if (not all(activity_type_df["Distance"] == 0)):
    fig_dist, axis = plt.subplots(1, 1, dpi=125)

    binsize = int((np.max(activity_type_df["Distance"]) - np.min(activity_type_df["Distance"]))/15)
    n, bins, patches = axis.hist(
        activity_type_df["Distance"],
        bins=np.linspace(
            0, np.max(activity_type_df["Distance"]), int(np.max(activity_type_df["Distance"]) / binsize)
        ),
        rwidth=0.90,
    )
    axis.vlines(np.mean(activity_type_df["Distance"]), 0, np.max(n), color="red", alpha=0.5)
    axis.vlines(np.median(activity_type_df["Distance"]), 0, np.max(n), color="green", alpha=0.5)
    axis.set_title("Histogram of distance ({})".format(activity_type_selected))
    axis.set_xlabel("Distance (km)")
    axis.set_ylabel("Occurences")
    axis.legend(["Mean", "Median", "Occurences"])
else:
    print("All distances equal to zero")

### Time

In [None]:

print(
    "Min, max, mean, median time: {:0.2f}, {:0.2f}, {:0.2f}, {:0.2f} minutes".format(
        np.min(activity_type_df["Time_min"]),
        np.max(activity_type_df["Time_min"]),
        np.mean(activity_type_df["Time_min"]),
        np.median(activity_type_df["Time_min"]),
    )
)

fig_dist, axis = plt.subplots(1, 1, dpi=125)

binsize = int((np.max(activity_type_df["Time_min"]) - np.min(activity_type_df["Time_min"]))/15)
n, bins, patches = axis.hist(
    activity_type_df["Time_min"],
    bins=np.linspace(
        0, np.max(activity_type_df["Time_min"]), int(np.max(activity_type_df["Time_min"]) / binsize)
    ),
    rwidth=0.90,
)
axis.vlines(np.mean(activity_type_df["Time_min"]), 0, np.max(n), color="red", alpha=0.5)
axis.vlines(np.median(activity_type_df["Time_min"]), 0, np.max(n), color="green", alpha=0.5)
axis.set_title("Histogram of time ({})".format(activity_type_selected))
axis.set_xlabel("Time (minutes)")
axis.set_ylabel("Occurences")
axis.legend(["Mean", "Median", "Occurences"])

### Number of activities, distance and time over time

In [None]:
activity_types_widget_y = np.append("All", activity_types_sorted)
type_select_y = widgets.Select(
    options=activity_types_widget_y,
    value=activity_types_widget_y[0],
    description="Activity type to analyse:",
    style={"description_width": "initial"},
)

combine_select_y = widgets.Checkbox(
    value=True,
    description='Combine similar activity types'
)

confirm_button_y = widgets.Button(description="Confirm", disabled=False)
display(type_select_y, combine_select_y, confirm_button_y)


def confirm_button_func_y(a):
    global activity_type_selected_y
    global activity_type_df_y
    global activity_type_df_y_grouped
    global combine_activity_types_y

    activity_type_selected_y = type_select_y.value
    combine_activity_types_y = combine_select_y.value
    
    if activity_type_selected_y == "All":
        activity_type_df_y = copy.deepcopy(df_gar)
        if combine_activity_types == True:
            activity_type_df_y.loc[activity_type_df_y.loc[:,"Activity Type"] == "Treadmill Running", "Activity Type"] = "Running"
            activity_type_df_y.loc[activity_type_df_y.loc[:,"Activity Type"] == "Mountain Biking", "Activity Type"] = "Cycling"
            activity_type_df_y.loc[activity_type_df_y.loc[:,"Activity Type"] == "Walking", "Activity Type"] = "Hiking"
            activity_type_df_y.loc[activity_type_df_y.loc[:,"Activity Type"] == "Pool Swim", "Activity Type"] = "Swimming"
            activity_type_df_y.loc[activity_type_df_y.loc[:,"Activity Type"] == "Open Water Swimming", "Activity Type"] = "Swimming"
    else:
        activity_type_df_y = copy.deepcopy(dfs_type[activity_type_selected_y])
        if combine_activity_types_y == True:
            if activity_type_selected_y == "Running":
                activity_type_df_y = pd.concat([activity_type_df_y, dfs_type["Treadmill Running"]])
            elif activity_type_selected_y == "Treadmill Running":
                activity_type_df_y = pd.concat([activity_type_df_y, dfs_type["Running"]])
            elif activity_type_selected_y == "Cycling":
                activity_type_df_y = pd.concat([activity_type_df_y, dfs_type["Mountain Biking"]])
            elif activity_type_selected_y == "Mountain Biking":
                activity_type_df_y = pd.concat([activity_type_df_y, dfs_type["Cycling"]])
            elif activity_type_selected_y == "Hiking":
                activity_type_df_y = pd.concat([activity_type_df_y, dfs_type["Walking"]])
            elif activity_type_selected_y == "Walking":
                activity_type_df_y = pd.concat([activity_type_df_y, dfs_type["Hiking"]])
            elif activity_type_selected_y == "Pool Swim":
                activity_type_df_y = pd.concat([activity_type_df_y, dfs_type["Open Water Swimming"]])
            elif activity_type_selected_y == "Open Water Swimming":
                activity_type_df_y = pd.concat([activity_type_df_y, dfs_type["Pool Swim"]])

    activity_type_df_y_grouped = activity_type_df_y.groupby("Year").sum(numeric_only=True)
    activity_type_df_y_grouped.loc[:, "n"] = activity_type_df_y.groupby("Year")["Activity Type"].count()
    activity_type_df_y_grouped.loc[:, "time_per_activity"] = activity_type_df_y_grouped.loc[:, "Time_min"] / activity_type_df_y_grouped.loc[:, "n"]
    activity_type_df_y_grouped.loc[:, "activities_per_week"] = activity_type_df_y_grouped.loc[:, "n"] / 52

    print("Selected activity type: " + activity_type_selected_y)
    return activity_type_df_y_grouped

confirm_button_y.on_click(confirm_button_func_y)

In [None]:
fig, axis = plt.subplots(5, 1, sharex=True, dpi=150)

axis[0].plot(activity_type_df_y_grouped["n"], "o--")
axis[0].set_ylabel("Activities\n(n)")
axis[0].grid(axis="y")

axis[1].plot(activity_type_df_y_grouped["activities_per_week"], "o--")
axis[1].set_ylabel("Activities/week\n(n)")
axis[1].grid(axis="y")

axis[2].plot(activity_type_df_y_grouped["Distance"], "o--")
axis[2].set_ylabel("Distance\n(km)")
axis[2].grid(axis="y")

axis[3].plot(activity_type_df_y_grouped["Time_min"] / 60, "o--")
axis[3].set_ylabel("Time\n(hours)")
axis[3].grid(axis="y")

axis[4].plot(activity_type_df_y_grouped["time_per_activity"], "o--")
axis[4].set_ylabel("Time/activity\n(minutes)")
ticks = np.linspace(activity_type_df_y_grouped.index.min(), activity_type_df_y_grouped.index.max(), activity_type_df_y_grouped.index.max() - activity_type_df_y_grouped.index.min() + 1)
ticks = np.unique([int(np.round(item)) for item in ticks])
axis[4].set_xticks(ticks, ticks)
axis[4].set_xlabel("Year")
axis[4].grid(axis="y")