In [1]:
import re
from collections import Counter
from datetime import datetime, timedelta

import git
import pandas as pd
import plotly.express as px

categories = {"build", "chore", "ci", "docs", "feat", "fix", "merge", "perf", "refactor", "revert", "style", "test"}

In [2]:
wfx = "/Users/timottinger/Projects/siemens-wfx"
goreleaser = "/Users/timottinger/Projects/goreleaser"
repo = git.Repo(goreleaser)


In [3]:

conventional_commit_match_pattern = re.compile(r'^(\w+)[\!\(:]')
for text in ["feat(this):", "feat:", "doc(readme): blah", "fix(that)!: cozectify the flimble"]:
    print(conventional_commit_match_pattern.search(text).group(1))

feat
feat
doc
fix


In [4]:
exclusions = {"go.mod", "go.sum", "Makefile", "makefile"}

In [5]:
start = datetime.now().astimezone() - timedelta(weeks=52)

In [6]:

counter = Counter()
for commit in repo.iter_commits():
    if commit.committed_datetime < start:
        continue
    match = conventional_commit_match_pattern.match(commit.message)
    intent = "unknown"
    if match and match.group(1) in categories:
        intent = match.group(1)
    for filename in commit.stats.files.keys():
        if filename in exclusions:
            continue
        counter[(filename, intent)] += 1

counter

Counter({('internal/client/github.go', 'chore'): 4,
         ('internal/client/github_test.go', 'chore'): 4,
         ('internal/builders/golang/build.go', 'fix'): 5,
         ('internal/builders/golang/build_test.go', 'fix'): 4,
         ('.github/workflows/codeql.yml', 'chore'): 25,
         ('internal/pipe/ko/ko_test.go', 'test'): 5,
         ('.github/workflows/build.yml', 'chore'): 58,
         ('.github/workflows/nightly-oss.yml', 'chore'): 51,
         ('.github/workflows/release.yml', 'chore'): 56,
         ('Dockerfile', 'chore'): 20,
         ('internal/pipe/blob/blob_test.go', 'chore'): 1,
         ('internal/pipe/blob/upload.go', 'chore'): 1,
         ('internal/pipe/nfpm/nfpm.go', 'feat'): 9,
         ('internal/pipe/nfpm/nfpm_test.go', 'feat'): 8,
         ('internal/pipe/nfpm/testdata/testfile-ppc64.txt', 'feat'): 1,
         ('internal/pipe/build/build.go', 'feat'): 5,
         ('internal/pipe/build/build_test.go', 'feat'): 4,
         ('pkg/config/config.go', 'feat'): 

In [7]:
counter.most_common()

[(('.github/workflows/build.yml', 'chore'), 58),
 (('.github/workflows/release.yml', 'chore'), 56),
 (('.github/workflows/nightly-oss.yml', 'chore'), 51),
 (('pkg/config/config.go', 'feat'), 28),
 (('.github/workflows/codeql.yml', 'chore'), 25),
 (('Dockerfile', 'chore'), 20),
 (('.github/workflows/lint.yml', 'chore'), 18),
 (('www/mkdocs.yml', 'docs'), 17),
 (('pkg/config/config.go', 'fix'), 16),
 (('www/docs/static/schema-pro.json', 'docs'), 15),
 (('www/docs/cmd/goreleaser_release.md', 'docs'), 14),
 (('.github/workflows/generate.yml', 'chore'), 14),
 (('www/docs/static/latest', 'chore'), 13),
 (('www/docs/static/latest-pro', 'chore'), 13),
 (('www/docs/static/releases-pro.json', 'chore'), 13),
 (('www/docs/static/releases.json', 'chore'), 13),
 (('flake.lock', 'chore'), 12),
 (('USERS.md', 'docs'), 11),
 (('.github/workflows/docs.yml', 'chore'), 11),
 (('internal/skips/skips.go', 'feat'), 11),
 (('www/docs/install.md', 'docs'), 10),
 (('www/docs/customization/sign.md', 'docs'), 10)

In [8]:
most_changed_counter = Counter()
for (filename, _), count in counter.items():
    most_changed_counter[filename] += count
file_set = {file for file, reason in most_changed_counter.most_common(30)}
file_set

{'.github/workflows/build.yml',
 '.github/workflows/codeql.yml',
 '.github/workflows/docs.yml',
 '.github/workflows/generate.yml',
 '.github/workflows/lint.yml',
 '.github/workflows/nightly-oss.yml',
 '.github/workflows/release.yml',
 '.goreleaser.yaml',
 'Dockerfile',
 'USERS.md',
 'flake.lock',
 'flake.nix',
 'internal/client/github.go',
 'internal/client/gitlab.go',
 'internal/pipe/nfpm/nfpm.go',
 'internal/pipe/nfpm/nfpm_test.go',
 'pkg/config/config.go',
 'www/docs/cmd/goreleaser_release.md',
 'www/docs/customization/blob.md',
 'www/docs/customization/builds.md',
 'www/docs/customization/nfpm.md',
 'www/docs/deprecations.md',
 'www/docs/install.md',
 'www/docs/static/latest',
 'www/docs/static/latest-pro',
 'www/docs/static/releases-pro.json',
 'www/docs/static/releases.json',
 'www/docs/static/schema-pro.json',
 'www/docs/static/schema.json',
 'www/mkdocs.yml'}

In [9]:
data_source = [
    (filename, reason, value)
    for ((filename, reason), value) in counter.items()
    if filename in file_set
]
data_source

[('internal/client/github.go', 'chore', 4),
 ('.github/workflows/codeql.yml', 'chore', 25),
 ('.github/workflows/build.yml', 'chore', 58),
 ('.github/workflows/nightly-oss.yml', 'chore', 51),
 ('.github/workflows/release.yml', 'chore', 56),
 ('Dockerfile', 'chore', 20),
 ('internal/pipe/nfpm/nfpm.go', 'feat', 9),
 ('internal/pipe/nfpm/nfpm_test.go', 'feat', 8),
 ('pkg/config/config.go', 'feat', 28),
 ('www/docs/customization/builds.md', 'feat', 3),
 ('.goreleaser.yaml', 'unknown', 2),
 ('www/docs/install.md', 'unknown', 1),
 ('www/docs/install.md', 'build', 1),
 ('.goreleaser.yaml', 'build', 7),
 ('.github/workflows/lint.yml', 'build', 3),
 ('.goreleaser.yaml', 'chore', 1),
 ('flake.nix', 'build', 6),
 ('flake.lock', 'chore', 12),
 ('www/docs/static/latest', 'chore', 13),
 ('www/docs/static/latest-pro', 'chore', 13),
 ('www/docs/static/releases-pro.json', 'chore', 13),
 ('www/docs/static/releases.json', 'chore', 13),
 ('www/docs/static/schema.json', 'chore', 6),
 ('www/docs/deprecation

In [10]:
df = pd.DataFrame(data_source, columns=["file", "reason", "count"])
df

Unnamed: 0,file,reason,count
0,internal/client/github.go,chore,4
1,.github/workflows/codeql.yml,chore,25
2,.github/workflows/build.yml,chore,58
3,.github/workflows/nightly-oss.yml,chore,51
4,.github/workflows/release.yml,chore,56
...,...,...,...
106,www/docs/static/schema.json,unknown,1
107,www/docs/static/schema.json,build,1
108,internal/pipe/nfpm/nfpm_test.go,docs,1
109,www/docs/install.md,fix,1


In [11]:


sorted(df['reason'].unique())

['build', 'chore', 'ci', 'docs', 'feat', 'fix', 'refactor', 'test', 'unknown']

In [14]:
color_choices = {
    "feat": "rgb(141,211,199)",
    "ci": "rgb(255,255,179)",
    "fix": "rgb(251,128,114)",
    "chore": "rgb(190,186,218)",
    "build": "rgb(128,177,211)",
    "docs": "rgb(253,180,98)",
    "test": "rgb(179,222,105)",
    "refactor": "rgb(252,205,229)",
    "unknown": "rgb(188,128,189)",
}
figure = px.bar(df, x="file", y="count", color="reason",
                color_discrete_map=color_choices)
figure.show()

In [15]:
figure = px.bar(df,
                x="reason", y="count",
                color="reason",
                color_discrete_map=color_choices,
                hover_name="file",
                height=1000
                )
figure.show()