In [None]:
import re
from collections import Counter
from datetime import datetime, timedelta

import git
import pandas as pd
import plotly.express as px

categories = {"build", "chore", "ci", "docs", "feat", "fix", "merge", "perf", "refactor", "revert", "style", "test"}

In [None]:
wfx = "/Users/timottinger/Projects/siemens-wfx"
goreleaser = "/Users/timottinger/Projects/goreleaser"
repo = git.Repo(goreleaser)


In [None]:

conventional_commit_match_pattern = re.compile(r'^(\w+)[\!\(:]')
for text in ["feat(this):", "feat:", "doc(readme): blah", "fix(that)!: cozectify the flimble"]:
    print(conventional_commit_match_pattern.search(text).group(1))

In [None]:
exclusions = {"go.mod", "go.sum", "Makefile", "makefile"}

In [None]:
start = datetime.now().astimezone() - timedelta(weeks=52)

In [None]:

counter = Counter()
for commit in repo.iter_commits():
    if commit.committed_datetime < start:
        continue
    match = conventional_commit_match_pattern.match(commit.message)
    intent = "unknown"
    if match and match.group(1) in categories:
        intent = match.group(1)
    for filename in commit.stats.files.keys():
        if filename in exclusions:
            continue
        counter[(filename, intent)] += 1

counter

In [None]:
counter.most_common()

In [None]:
most_changed_counter = Counter()
for (filename, _), count in counter.items():
    most_changed_counter[filename] += count
file_set = {file for file, reason in most_changed_counter.most_common(30)}
file_set

In [None]:
data_source = [
    (filename, reason, value)
    for ((filename, reason), value) in counter.items()
    if filename in file_set
]
data_source

In [None]:
df = pd.DataFrame(data_source, columns=["file", "reason", "count"])
df

In [None]:


sorted(df['reason'].unique())

In [None]:
color_choices = {
    "feat": "rgb(141,211,199)",
    "ci": "rgb(255,255,179)",
    "fix": "rgb(251,128,114)",
    "chore": "rgb(190,186,218)",
    "build": "rgb(128,177,211)",
    "docs": "rgb(253,180,98)",
    "test": "rgb(179,222,105)",
    "refactor": "rgb(252,205,229)",
    "unknown": "rgb(188,128,189)",
}
figure = px.bar(df, x="file", y="count", color="reason", width=1200, height=700,
                color_discrete_map=color_choices)
figure.show()

In [None]:
figure = px.bar(df,
                x="reason", y="count",
                color="reason",
                color_discrete_map=color_choices,
                hover_name="file",
                height=500
                )
figure.show()