In [None]:
import matplotlib.pyplot as plt
import transform
import request
import datetime

In [None]:
start_date_2018 = datetime.datetime(2018, 1, 1).isoformat() + 'Z'
stop_date_2018 = datetime.datetime(2018, 12, 31).isoformat() + 'Z'
start_date_2019 = datetime.datetime(2019, 1, 1).isoformat() + 'Z'
stop_date_2019 = datetime.datetime(2019, 12, 31).isoformat() + 'Z'
start_date_2020 = datetime.datetime(2020, 1, 1).isoformat() + 'Z'
stop_date_2020 = datetime.datetime.utcnow().isoformat() + 'Z'

In [None]:
df_2018 = transform.get_dataframe(request.get_filtered_events(
    start_date_2018, stop_date_2018, "summary"))
df_2019 = transform.get_dataframe(request.get_filtered_events(
    start_date_2019, stop_date_2019, "summary"))
df_2020 = transform.get_dataframe(request.get_filtered_events(
    start_date_2020, stop_date_2020, "summary"))

In [None]:
df_2020

Overall distance

In [None]:
print(df_2020.sum()["distance"])
print(df_2019.sum()["distance"])
print(df_2018.sum()["distance"])

Average distance

In [None]:
print(df_2020.mean()["distance"])
print(df_2019.mean()["distance"])
print(df_2018.mean()["distance"])

Longest runs

In [None]:
df_2020.sort_values(by=["distance"], ascending=False)["distance"][:10]

Daily performance

In [None]:
fig, ax = plt.subplots()
df_2020.plot.scatter("date", "distance", ax=ax)

In [None]:
fig.savefig("2020_individual_runs.png")

In [None]:
fig, ax = plt.subplots()
df_2020.hist("distance", ax=ax)
percentages = [.05, .25, .5, .75, .95]
for percentage in percentages:
    percentile = df_2020["distance"].quantile(percentage)
    ax.axvline(percentile, linestyle = ":", color="yellow")
    ax.text(percentile+.1, 1, f"{int(percentage*100)}%", size = 12, alpha = 0.8, color="yellow")
ax.grid(False)
ax.set_xlabel("distance [km]")
ax.set_ylabel("number of occurrences")
ax.set_title("Histogram of running distances with selected percentiles.")

In [None]:
fig.savefig("run_histogram.png")

Monthly performance

In [None]:
df_2020.groupby("month").sum()["distance"]

In [None]:
fig, ax = plt.subplots()
df_2020.groupby(["month"], as_index=False)["distance"].sum().plot(
    x="month", y="distance", kind="scatter", ax=ax, color="b", label="2020")
df_2019.groupby(["month"], as_index=False)["distance"].sum().plot(
    x="month", y="distance", kind="scatter", ax=ax, color="r", label="2019")
df_2018.groupby(["month"], as_index=False)["distance"].sum().plot(
    x="month", y="distance", kind="scatter", ax=ax, color="orange", label="2018")
ax.legend()
ax.set_title("Monthly distance")
ax.set_ylabel("distance [km]")
ax.set_xlabel("month")

In [None]:
fig.savefig("monthly_distances.png")

In [None]:
fig, ax = plt.subplots()
labels = ["2020", "2019", "2018"]
colors = ["b", "r", "orange"]
distances = [
    df_2020.groupby(["month"], as_index=False)["distance"].sum()["distance"],
    df_2019.groupby(["month"], as_index=False)["distance"].sum()["distance"],
    df_2018.groupby(["month"], as_index=False)["distance"].sum()["distance"],
]
ax.hist(distances, 5, density=True, label=labels, color=colors)
ax.legend()
ax.set_title("Empirical densities of monthly distances. #bins=5.")
ax.set_xlabel("distance [km]")

In [None]:
fig.savefig("monthly_densities.png")

In [None]:
fig, ax = plt.subplots()
df_2020.groupby(["month"], as_index=False)["distance"].mean().plot(
    x="month", y="distance", kind="scatter", ax=ax, color="b", label="2020")
df_2019.groupby(["month"], as_index=False)["distance"].mean().plot(
    x="month", y="distance", kind="scatter", ax=ax, color="r", label="2019")
df_2018.groupby(["month"], as_index=False)["distance"].mean().plot(
    x="month", y="distance", kind="scatter", ax=ax, color="orange", label="2018")
ax.legend()
ax.set_title("Average run distance per month.")
ax.set_ylabel("distance [km]")
ax.set_xlabel("month")

In [None]:
fig.savefig("monthly_average_distances.png")

Weekly performances

In [None]:
fig, ax = plt.subplots()
df_2020.groupby(["week"], as_index=False)["distance"].sum().plot(x="week", y="distance", kind="scatter", ax=ax)
ax.set_ylabel("distance [km]")

In [None]:
fig.savefig("weekly_distances.png")

In [None]:
fig, ax = plt.subplots()
df_2020.groupby(["week"], as_index=False)["distance"].sum()["distance"].plot.hist(bins=10, density=True, ax=ax)
ax.set_xlabel("distance [km]")
ax.set_title("Empirical densities of weekly distances. #bins=10.")

In [None]:
fig.savefig("weekly_distances_histogram.png")

Longest duration between two runs

In [None]:
df_2020.sort_values(by=["date"])
t1 = df_2020["date"][0]
max_diff = t1 - t1
max_diff_t = t1
for index, row in df_2020.iterrows():
    t2 = row["date"]
    if t2 - t1 > max_diff:
        max_diff_t = t1
        max_diff = t2 - t1
    t1 = t2
print(max_diff_t)
print(max_diff)

How many weeks without running?

In [None]:
52 - df_2020["week"].nunique()

In [None]:
df_2020

In [None]:
day_numbers = df_2020['date'].dt.dayofyear.tolist()
distances = df_2020['distance']
missing_days = [day_number for day_number in range(1, 366) if day_number not in day_numbers]
missing_distances = [0 for _ in missing_days]
daily_distances = list(zip(day_numbers, distances)) + list(zip(missing_days, missing_distances))
daily_distances.sort()
daily_distances = [distance for _, distance in daily_distances]

In [None]:
def get_max(numbers, window_size):
    current_left_pointer = 0
    current_value = sum(numbers[current_left_pointer : current_left_pointer + window_size])
    max_left_pointer = current_left_pointer
    max_value = current_value

    for current_left_pointer in range(1, len(numbers) - window_size + 1):
        current_value -= numbers[current_left_pointer - 1]
        current_value += numbers[current_left_pointer + window_size - 1]
        if current_value > max_value:
            max_value = current_value
            max_left_pointer = current_left_pointer

    return max_value, max_left_pointer

In [None]:
window_sizes = list(range(1, 22))
max_sums = list(map(lambda window_size: get_max(daily_distances, window_size)[0], window_sizes))

In [None]:
fig, ax = plt.subplots()
ax.scatter(window_sizes, max_sums)
ax.set_title("Max distance run in k consecutive days.")
ax.set_ylabel("Distance [km]")
ax.set_xlabel("Number of days")

In [None]:
fig.savefig("max_window_distances.png")

In [None]:
import importlib

In [None]:
importlib.reload(transform)

In [None]:
df_2020_all = transform.get_dataframe(request.get_filtered_events(
    start_date_2020, stop_date_2020, "color"))
len(df_2020) / len(df_2020_all)

In [None]:
df_2019_all = transform.get_dataframe(request.get_filtered_events(
    start_date_2019, stop_date_2019, "color"))
len(df_2019) / len(df_2019_all)

In [None]:
df_2018_all = transform.get_dataframe(request.get_filtered_events(
    start_date_2018, stop_date_2018, "color"))
len(df_2018) / len(df_2018_all)

In [None]:
len(df_2020)

In [None]:
len(df_2019)

In [None]:
len(df_2018)

In [None]:
fig, ax = plt.subplots()
df_2020["hour"].plot.hist(bins=10, ax=ax)
ax.set_xlabel("hour of the day")
ax.set_ylabel("number of occurrences")
ax.set_title("Histogram of runs by hours of the day. #bins=10")

In [None]:
fig.savefig("hourly_distance_histogram.png")

In [None]:
import seaborn as sns
fig, ax = plt.subplots()
ax = sns.boxplot(x="hour", y="distance", data=df_2020)
ax.set_ylabel("distance [km]")
ax.set_xlabel("hour of the day")
ax.set_title("Empirical distributions of distances per hour. ")

In [None]:
ax.get_figure().savefig("hourly_distance_boxplot.png")