In [None]:
# Import required packages
import os
import fnmatch
import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import product
from ipywidgets import interact

%matplotlib inline
cmap = matplotlib.cm.get_cmap("tab20")

day_cats = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]

In [None]:
# Setting filepaths
#
# Get Current Working Directory
CWD = os.getcwd()

# Set Relative and Absolute filepaths for clean data
CLEAN_DATA_RELPATH = os.path.join(os.pardir, os.pardir, "data", "clean")
CLEAN_DATA_ABSPATH = os.path.join(CWD, CLEAN_DATA_RELPATH)

In [None]:
# Find data files
datafiles = fnmatch.filter(os.listdir(CLEAN_DATA_ABSPATH), "*.csv")

In [None]:
# Read in the first datafile
filepath = os.path.join(CLEAN_DATA_ABSPATH, datafiles[0])
data = pd.read_csv(
    filepath,
    converters={
        "dt_start": pd.to_datetime,
        "dt_end": pd.to_datetime,
        "Duration": pd.to_timedelta,
    },
    na_values=0.0,
)

# Sort the data by start date
data.sort_values("dt_start", inplace=True, ignore_index=True)

# Convert `Duration` into hours
data["Duration"] = data["Duration"] / np.timedelta64(1, "h")

### Total time spent on tasks per week

In [None]:
# Calculate total duration per task per week number
total_time_per_task = pd.DataFrame({})
for task in np.sort(data.Task.dropna().unique()):
    total_time_per_task[task] = (
        data[data.Task == task].groupby("week_number")["Duration"].sum()
    )

total_time_per_task["Total"] = data.groupby("week_number")["Duration"].sum()
total_time_per_task = total_time_per_task.T

In [None]:
@interact
def plot_total_time_per_task(column=total_time_per_task.columns.tolist()):
    total_time_per_task[column].drop("Total", axis=0).plot.pie(
        figsize=(12, 8),
        title=f"Total = {total_time_per_task.loc['Total', column]:.1f} hrs",
        ylabel="",
        colormap=cmap,
        autopct="%1.1f%%",
    )

    plt.legend(
        loc="upper left",
        bbox_to_anchor=(1.05, 1),
    );

### Total time spent on projects per week

In [None]:
total_time_per_project = pd.DataFrame({})

for project in np.sort(data.Project.dropna().unique()):
    total_time_per_project[project] = (
        data[data.Project == project].groupby("week_number")["Duration"].sum()
    )

total_time_per_project["Total"] = data.groupby("week_number")["Duration"].sum()
total_time_per_project = total_time_per_project.T.sort_index()

In [None]:
@interact
def plot_total_time_per_project(column=total_time_per_project.columns.tolist()):
    total_time_per_project[column].drop("Total", axis=0).plot.pie(
        figsize=(12, 8),
        title=f"Total = {total_time_per_project.loc['Total', column]:.1f} hrs",
        ylabel="",
        colormap=cmap,
        autopct="%1.1f%%",
    )

    plt.legend(
        loc="upper left",
        bbox_to_anchor=(1.05, 1),
    );

### Total time spent on tasks per project (across all 3 weeks)

In [None]:
columns = np.sort(data.Task.dropna().unique())
tasks_per_project = pd.DataFrame(columns=columns)

for project in np.sort(data.Project.dropna().unique()):
    tasks_per_project.loc[project] = (
        data[data.Project == project].groupby("Task")["Duration"].sum()
    )

tasks_per_project["Total"] = tasks_per_project.sum(axis=1)
tasks_per_project = tasks_per_project.T

In [None]:
@interact
def plot_tasks_per_project(column=tasks_per_project.columns.tolist()):
    tasks_per_project[column].drop("Total", axis=0).plot.pie(
        figsize=(12, 8),
        title=f"Total = {tasks_per_project.loc['Total', column]:.1f} hrs",
        ylabel="",
        colormap=cmap,
        autopct="%1.1f%%",
    )

    plt.legend(
        loc="upper left",
        bbox_to_anchor=(1.05, 1),
    );

### Tasks per Weekday

In [None]:
# Create a MultiIndex for week number and day of week
MultiIndex = list(
    product(
        np.sort(data.week_number.dropna().unique()),
        day_cats,
    )
)
index = pd.MultiIndex.from_tuples(MultiIndex, names=["week_number", "day_of_week"])

In [None]:
columns = np.sort(data.Task.dropna().unique())
tasks_per_weekday = pd.DataFrame(columns=columns, index=index)

result = product(np.sort(data.week_number.dropna().unique()), day_cats)

for i, (week_num, day) in enumerate(result):
    tasks_per_weekday.loc[week_num, day] = (
        data[(data.week_number == week_num) & (data.day_of_week == day)]
        .groupby("Task")["Duration"]
        .sum()
    )

tasks_per_weekday["Total"] = tasks_per_weekday.sum(axis=1)

In [None]:
@interact
def plot_tasks_per_weekday(
    week_num=tasks_per_weekday.index.get_level_values("week_number").unique().tolist(),
):
    tasks_per_weekday.loc[week_num, :].drop("Total", axis=1).plot.bar(
        figsize=(12, 8),
        title=f"Week number: {week_num}",
        xlabel="Day of Week",
        ylabel="Hours clocked",
        colormap=cmap,
        stacked=True,
    )

    plt.legend(
        loc="upper left",
        bbox_to_anchor=(1.05, 1),
    );

### Projects per weekday

In [None]:
columns = np.sort(data.Project.dropna().unique())
projects_per_weekday = pd.DataFrame(columns=columns, index=index)

result = product(np.sort(data.week_number.dropna().unique()), day_cats)

for i, (week_num, day) in enumerate(result):
    projects_per_weekday.loc[week_num, day] = (
        data[(data.week_number == week_num) & (data.day_of_week == day)]
        .groupby("Project")["Duration"]
        .sum()
    )

projects_per_weekday["Total"] = projects_per_weekday.sum(axis=1)

In [None]:
@interact
def plot_projects_per_weekday(
    week_num=projects_per_weekday.index.get_level_values("week_number")
    .unique()
    .tolist(),
):
    projects_per_weekday.loc[week_num, :].drop("Total", axis=1).plot.bar(
        figsize=(12, 8),
        title=f"Week number: {week_num}",
        xlabel="Day of Week",
        ylabel="Hours clocked",
        colormap=cmap,
        stacked=True,
    )

    plt.legend(
        loc="upper left",
        bbox_to_anchor=(1.05, 1),
    );

### Work Schedule

In [None]:
# Create dataframe of start and end times grouped by week number of day of the week
start_times = data.groupby(["week_number", "day_of_week"])["dt_start"].min()
end_times = data.groupby(["week_number", "day_of_week"])["dt_end"].max()
schedule_df = pd.DataFrame({"start": start_times.dt.time, "end": end_times.dt.time})

In [None]:
pd.plotting.register_matplotlib_converters()

result = product(
    np.sort(schedule_df.index.get_level_values("week_number").dropna().unique())[::-1],
    day_cats[::-1],
)

y_labels = []

fig, ax = plt.subplots(figsize=(12, 8))
for i, (week_num, weekday) in enumerate(result):
    try:
        ax.plot(schedule_df.loc[week_num, weekday], [i + 1, i + 1], c=cmap(i), lw=15)
        y_labels.append(f"{week_num} / {weekday}")
    except KeyError:
        pass

ax.set_yticks(range(1, 15))
ax.set_yticklabels(y_labels)
ax.set_ylabel("Week # / Day of Week")

day_range = pd.date_range("08:00:00", periods=12, freq="H").strftime("%H:%M:%S")
ax.set_xticks(day_range.values.tolist())
ax.set_xlabel("Time");

### Tracking GitHub Activity

In [None]:
from datetime import datetime
from ghapi.all import GhApi, github_token, paged

token = (
    github_token() if "GITHUB_TOKEN" not in os.environ else os.environ["GITHUB_TOKEN"]
)

api = GhApi(token=token)

In [None]:
since_date = data.loc[0].dt_start
until_date = data.loc[len(data) - 1].dt_end

since_date_str = since_date.strftime("%Y-%m-%dT%H:%M:%SZ")
until_date_str = until_date.strftime("%Y-%m-%dT%H:%M:%SZ")

In [None]:
issues = api.issues.list_for_authenticated_user(
    filter="created", state="open", since=since_date_str, pulls=False
)
indices_to_remove = []

for i, issue in enumerate(issues):
    created_at = datetime.strptime(issue["created_at"], "%Y-%m-%dT%H:%M:%SZ")

    # If earlier than since_date or later than until_date
    if ((created_at - since_date) < pd.to_timedelta(0)) or (
        (created_at - until_date) > pd.to_timedelta(0)
    ):
        indices_to_remove.append(i)

for index in sorted(indices_to_remove, reverse=True):
    del issues[index]
len(issues)