This is an exploration of building a time-based graph of activities based
on conditional commit nomenclature in the commit messages of a project.


In [40]:
import re
from collections import Counter
from datetime import datetime, timedelta, date

import git
import pandas as pd
import plotly.express as px

In [41]:

pattern = re.compile(r'^(\w+)[!(:]')

match = pattern.match("chore(deps)!:this was an update but it breaks stuff")
match.groups()

In [None]:
wfx_repo = "/Users/timottinger/Projects/siemens-wfx"
goreleaser = "/Users/timottinger/Projects/goreleaser"
repo = git.Repo(goreleaser)

In [50]:
def reason_by_date(repo: git.Repo, on_or_after: datetime):
    for commit in repo.iter_commits():
        if not commit.authored_datetime >= on_or_after:
            continue
        parse = pattern.match(commit.message.strip())
        reason = parse.group(1) if parse else commit.message.split()[0]
        if '(' in reason:
            print(f"BAD: [{commit.message}]")
        start_of_week = calculate_start_of_week(commit.authored_datetime.date())
        yield start_of_week, reason.lower()


def calculate_start_of_week(day_within_week: date) -> date:
    days_since_monday: int = day_within_week.weekday()
    return day_within_week - timedelta(days_since_monday)



In [None]:
color_choices = {
    "feat": "rgb(141,211,199)",
    "ci": "rgb(255,255,179)",
    "fix": "rgb(251,128,114)",
    "chore": "rgb(190,186,218)",
    "build": "rgb(128,177,211)",
    "docs": "rgb(253,180,98)",
    "test": "rgb(179,222,105)",
    "refactor": "rgb(252,205,229)",
    "unknown": "rgb(188,128,189)",
    "sec": "seagreen",
    "revert": "black"
}

Don't try to graph the whole history, just the past year or so

In [None]:
earliest = datetime.today().astimezone() - timedelta(weeks=10)
data_set = Counter(reason_by_date(repo, earliest))

In [None]:
data = [(date, kind, value) for ((date, kind), value) in data_set.items()]

df = pd.DataFrame(data, columns=['date', 'reason', 'count'])

In [None]:

df


Unnamed: 0,date,reason,count
0,2024-08-12,chore,15
1,2024-08-12,feat,2
2,2024-08-12,build,5
3,2024-08-12,merge,1
4,2024-08-12,test,1
5,2024-08-12,docs,1
6,2024-08-12,ci,1
7,2024-08-05,docs,7
8,2024-08-05,chore,13
9,2024-08-05,sec,1


In [48]:
figure = px.bar(df,
                x='date',
                y='count',
                color='reason',
                labels="date",
                color_discrete_map=color_choices
                )
figure.show()