In [61]:
import re
from collections import Counter

import git
import pandas as pd
import plotly.express as px

In [62]:
repo = git.Repo("/Users/timottinger/Projects/siemens-wfx")


In [63]:

pattern = re.compile(r'^(\w+)[\!(:]')
for text in ["feat(this):", "feat:", "doc(readme): blah", "fix(that): cozectify the flimble"]:
    print(pattern.search(text).group(1))

feat
feat
doc
fix


In [64]:
counter = Counter()
for commit in repo.iter_commits():
    match = pattern.match(commit.message)
    intent = match.group(1) if match else commit.message.split()[0]
    for filename in commit.stats.files.keys():
        if filename.endswith(".sum"):
            continue
        counter[(filename, intent)] += 1

counter

Counter({('.github/workflows/chore.yml', 'fix'): 1,
         ('.github/workflows/ci.yml', 'fix'): 2,
         ('.github/workflows/pages.yml', 'fix'): 2,
         ('.gitlab-ci.yml', 'fix'): 1,
         ('contrib/config-deployment/client/go.mod', 'fix'): 2,
         ('contrib/remote-access/client/go.mod', 'fix'): 2,
         ('go.mod', 'fix'): 3,
         ('hugo/package-lock.json', 'fix'): 1,
         ('.ci/packages/versions.env', 'chore'): 10,
         ('renovate.json5', 'chore'): 1,
         ('.github/workflows/chore.yml', 'chore'): 2,
         ('.github/workflows/ci.yml', 'chore'): 19,
         ('.github/workflows/pages.yml', 'chore'): 11,
         ('.github/workflows/pr.yml', 'chore'): 4,
         ('.github/workflows/release.yml', 'chore'): 10,
         ('.gitlab-ci.yml', 'chore'): 5,
         ('generated/ent/ent.go', 'chore'): 1,
         ('generated/ent/history_query.go', 'chore'): 1,
         ('generated/ent/job_query.go', 'chore'): 1,
         ('generated/ent/runtime/runtime.go',

In [65]:
counter.most_common()

[(('go.mod', 'chore'), 73),
 (('.github/workflows/ci.yml', 'chore'), 19),
 (('hugo/package-lock.json', 'chore'), 13),
 (('.github/workflows/pages.yml', 'chore'), 11),
 (('.ci/packages/versions.env', 'chore'), 10),
 (('.github/workflows/release.yml', 'chore'), 10),
 (('hugo/package.json', 'chore'), 10),
 (('CHANGELOG.md', 'chore'), 7),
 (('.github/workflows/ci.yml', 'ci'), 7),
 (('.goreleaser.yml', 'ci'), 7),
 (('CHANGELOG.md', 'fix'), 6),
 (('contrib/config-deployment/client/go.mod', 'chore'), 6),
 (('contrib/remote-access/client/go.mod', 'chore'), 6),
 (('hugo/go.mod', 'chore'), 6),
 (('.gitlab-ci.yml', 'chore'), 5),
 (('CHANGELOG.md', 'feat'), 5),
 (('codecov.yml', 'ci'), 5),
 (('go.mod', 'feat'), 5),
 (('.github/workflows/pr.yml', 'chore'), 4),
 (('generated/ent/runtime/runtime.go', 'chore'), 4),
 (('example/plugin/go.mod', 'chore'), 4),
 (('justfile', 'ci'), 4),
 (('go.mod', 'fix'), 3),
 (('.github/workflows/release.yml', 'ci'), 3),
 (('persistence/mock_Storage.go', 'chore'), 3),
 

In [66]:
most_changed_counter = Counter()
for (filename, _), count in counter.items():
    most_changed_counter[filename] += count
file_set = {file for file, reason in most_changed_counter.most_common(30)}
file_set

{'.ci/packages/versions.env',
 '.github/dependabot.yml',
 '.github/workflows/ci.yml',
 '.github/workflows/pages.yml',
 '.github/workflows/pr.yml',
 '.github/workflows/release.yml',
 '.gitlab-ci.yml',
 '.golangci.yml',
 '.goreleaser.yml',
 'CHANGELOG.md',
 'README.md',
 'cmd/wfx-viewer/flags.go',
 'cmd/wfx-viewer/main.go',
 'cmd/wfx/cmd/root/cmd.go',
 'cmd/wfx/cmd/root/northbound.go',
 'cmd/wfx/cmd/root/southbound.go',
 'codecov.yml',
 'contrib/config-deployment/client/go.mod',
 'contrib/remote-access/client/go.mod',
 'docs/installation.md',
 'example/plugin/go.mod',
 'generated/ent/runtime/runtime.go',
 'go.mod',
 'go.work',
 'hugo/go.mod',
 'hugo/package-lock.json',
 'hugo/package.json',
 'justfile',
 'persistence/mock_Storage.go',
 'shell.nix'}

In [67]:
data_source = [
    (filename, reason, value)
    for ((filename, reason), value) in counter.items()
    if filename in file_set
]
data_source

[('.github/workflows/ci.yml', 'fix', 2),
 ('.github/workflows/pages.yml', 'fix', 2),
 ('.gitlab-ci.yml', 'fix', 1),
 ('contrib/config-deployment/client/go.mod', 'fix', 2),
 ('contrib/remote-access/client/go.mod', 'fix', 2),
 ('go.mod', 'fix', 3),
 ('hugo/package-lock.json', 'fix', 1),
 ('.ci/packages/versions.env', 'chore', 10),
 ('.github/workflows/ci.yml', 'chore', 19),
 ('.github/workflows/pages.yml', 'chore', 11),
 ('.github/workflows/pr.yml', 'chore', 4),
 ('.github/workflows/release.yml', 'chore', 10),
 ('.gitlab-ci.yml', 'chore', 5),
 ('generated/ent/runtime/runtime.go', 'chore', 4),
 ('go.mod', 'chore', 73),
 ('hugo/package-lock.json', 'chore', 13),
 ('shell.nix', 'chore', 2),
 ('CHANGELOG.md', 'fix', 6),
 ('example/plugin/go.mod', 'fix', 1),
 ('CHANGELOG.md', 'chore', 7),
 ('.goreleaser.yml', 'chore', 2),
 ('.github/workflows/release.yml', 'ci', 3),
 ('contrib/config-deployment/client/go.mod', 'chore', 6),
 ('contrib/remote-access/client/go.mod', 'chore', 6),
 ('example/plugin

In [68]:
df = pd.DataFrame(data_source, columns=["file", "reason", "count"])
df

Unnamed: 0,file,reason,count
0,.github/workflows/ci.yml,fix,2
1,.github/workflows/pages.yml,fix,2
2,.gitlab-ci.yml,fix,1
3,contrib/config-deployment/client/go.mod,fix,2
4,contrib/remote-access/client/go.mod,fix,2
...,...,...,...
109,hugo/package-lock.json,wfx,1
110,hugo/package.json,wfx,1
111,justfile,wfx,1
112,shell.nix,wfx,1


In [69]:
figure = px.bar(df, x="file", y="count", color="reason", height=700)
figure.show()

In [73]:
figure = px.bar(df, x="reason", y="count", color="reason", hover_name="file", height=500)
figure.show()