In [None]:
import os
import fnmatch
import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
# Setting filepaths
#
# Get Current Working Directory
CWD = os.getcwd()

# Set Relative and Absolute filepaths for clean data
CLEAN_DATA_RELPATH = os.path.join(os.pardir, os.pardir, "data", "clean")
CLEAN_DATA_ABSPATH = os.path.join(CWD, CLEAN_DATA_RELPATH)

In [None]:
# Find data files
datafiles = fnmatch.filter(os.listdir(CLEAN_DATA_ABSPATH), "*.csv")

In [None]:
# Read in the first datafile
filepath = os.path.join(CLEAN_DATA_ABSPATH, datafiles[0])
data = pd.read_csv(filepath, converters={"dt_start": pd.to_datetime, "dt_end": pd.to_datetime, "Duration": pd.to_timedelta}, na_values=0.0)

# Sort the data by start date
data.sort_values("dt_start", inplace=True, ignore_index=True)

# Convert `Duration` into hours
data["Duration"] = data["Duration"] / np.timedelta64(1, "h")

### Calculate total duration per task per week

In [None]:
# Calculate total duration per week number
total_duration = data.groupby("week_number")["Duration"].sum().reset_index()
duration_per_task = pd.DataFrame({})

# Calculate total duration per task per week number
for task in data["Task"].dropna().unique():
    duration_per_task[task] = data[data["Task"] == task].groupby("week_number")["Duration"].sum()
    duration_per_task[task] = [i / j * 100 for i, j in zip(duration_per_task[task], total_duration["Duration"])]

duration_per_task = duration_per_task.cumsum(axis=1)
duration_per_task.reset_index(inplace=True)

In [None]:
# Create a stacked percentage plot of the data
cmap = matplotlib.cm.get_cmap("tab20")
increments = np.linspace(0, 1, num=len(duration_per_task.columns.tolist())-1, endpoint=True)

plt.figure(figsize=(10,6))
barWidth = 0.85

for i, task in enumerate(duration_per_task.columns.tolist()[::-1]):
    if task == "week_number":
        continue

    plt.bar(
        duration_per_task["week_number"],
        duration_per_task[task],
        color=cmap(increments[i]),
        edgecolor="white",
        width=barWidth,
        label=task,
    )

    if i == 0:
        for j in range(len(total_duration)):
            plt.annotate(
                f'Total = {total_duration.loc[j, "Duration"]:.1f} hrs',
                xy=(total_duration.loc[j, "week_number"], 100),
                ha="center",
                va="bottom",
            )
        
plt.xticks(duration_per_task["week_number"], duration_per_task["week_number"].astype(str))
plt.xlabel("Week Number")
plt.ylabel("Hours clocked (%)")
plt.legend(loc="upper left", bbox_to_anchor=(1.05, 1))
plt.title("Hours clocked per task per week");
