In [82]:
import re
from collections import Counter

import git
import pandas as pd
import plotly.express as px

In [83]:
wfx = "/Users/timottinger/Projects/siemens-wfx"
goreleaser = "/Users/timottinger/Projects/goreleaser"
repo = git.Repo(goreleaser)


In [84]:

pattern = re.compile(r'^(\w+)[\!(:]')
for text in ["feat(this):", "feat:", "doc(readme): blah", "fix(that): cozectify the flimble"]:
    print(pattern.search(text).group(1))

feat
feat
doc
fix


In [85]:
exclusions = {"go.mod", "go.sum", "Makefile", "makefile"}

In [86]:
counter = Counter()
for commit in repo.iter_commits():
    match = pattern.match(commit.message)
    intent = match.group(1) if match else commit.message.split()[0]
    for filename in commit.stats.files.keys():
        if filename in exclusions:
            continue
        counter[(filename, intent)] += 1

counter

Counter({('.github/workflows/dependabot-automation.yml', 'ci'): 1,
         ('www/docs/ci/actions.md', 'docs'): 39,
         ('internal/pipe/ko/ko.go', 'feat'): 7,
         ('internal/pipe/ko/ko_test.go', 'feat'): 8,
         ('www/docs/customization/ko.md', 'feat'): 6,
         ('www/docs/deprecations.md', 'feat'): 33,
         ('.github/workflows/build.yml', 'ci'): 2,
         ('internal/tmpl/tmpl.go', 'fix'): 12,
         ('internal/tmpl/tmpl_test.go', 'fix'): 7,
         ('internal/pipe/ko/ko_test.go', 'test'): 7,
         ('www/htmltest.yml', 'ci'): 1,
         ('.github/workflows/build.yml', 'chore'): 200,
         ('.github/workflows/nightly-oss.yml', 'chore'): 61,
         ('.github/workflows/release.yml', 'chore'): 130,
         ('internal/pipe/defaults/defaults.go', 'feat'): 7,
         ('internal/pipe/defaults/defaults_test.go', 'feat'): 12,
         ('internal/pipe/dist/dist.go', 'feat'): 5,
         ('internal/pipe/dist/dist_test.go', 'feat'): 2,
         ('internal/pipeli

In [87]:
counter.most_common()

[(('pkg/config/config.go', 'feat'), 247),
 (('.github/workflows/build.yml', 'chore'), 200),
 (('.github/workflows/release.yml', 'chore'), 130),
 (('README.md', 'Merge'), 88),
 (('.github/workflows/codeql.yml', 'chore'), 84),
 (('www/mkdocs.yml', 'docs'), 82),
 (('config/config.go', 'Merge'), 82),
 (('Dockerfile', 'feat'), 74),
 (('pipeline/brew/brew.go', 'Merge'), 69),
 (('www/docs/static/schema-pro.json', 'docs'), 67),
 (('www/docs/static/schema.json', 'chore'), 62),
 (('.github/workflows/nightly-oss.yml', 'chore'), 61),
 (('pkg/config/config.go', 'fix'), 60),
 (('internal/pipe/nfpm/nfpm.go', 'feat'), 57),
 (('pipeline/build/build.go', 'Merge'), 56),
 (('.github/workflows/generate.yml', 'chore'), 53),
 (('.github/workflows/lint.yml', 'chore'), 50),
 (('pipeline/brew/brew_test.go', 'Merge'), 50),
 (('pipeline/release/release.go', 'Merge'), 50),
 (('internal/client/github.go', 'feat'), 49),
 (('.github/workflows/docs.yml', 'chore'), 48),
 (('internal/pipe/brew/brew.go', 'feat'), 48),
 (

In [88]:
most_changed_counter = Counter()
for (filename, _), count in counter.items():
    most_changed_counter[filename] += count
file_set = {file for file, reason in most_changed_counter.most_common(30)}
file_set

{'.github/workflows/build.yml',
 '.github/workflows/codeql.yml',
 '.github/workflows/release.yml',
 '.goreleaser.yml',
 '.travis.yml',
 'Dockerfile',
 'README.md',
 'config/config.go',
 'context/context.go',
 'internal/artifact/artifact.go',
 'internal/builders/golang/build.go',
 'internal/builders/golang/build_test.go',
 'internal/client/github.go',
 'internal/pipe/brew/brew.go',
 'internal/pipe/brew/brew_test.go',
 'internal/pipe/docker/docker.go',
 'internal/pipe/docker/docker_test.go',
 'internal/pipe/nfpm/nfpm.go',
 'internal/pipe/nfpm/nfpm_test.go',
 'main.go',
 'pipeline/brew/brew.go',
 'pipeline/brew/brew_test.go',
 'pipeline/build/build.go',
 'pipeline/defaults/defaults.go',
 'pipeline/fpm/fpm.go',
 'pipeline/release/release.go',
 'pkg/config/config.go',
 'www/docs/static/schema-pro.json',
 'www/docs/static/schema.json',
 'www/mkdocs.yml'}

In [89]:
data_source = [
    (filename, reason, value)
    for ((filename, reason), value) in counter.items()
    if filename in file_set
]
data_source

[('.github/workflows/build.yml', 'ci', 2),
 ('.github/workflows/build.yml', 'chore', 200),
 ('.github/workflows/release.yml', 'chore', 130),
 ('.github/workflows/codeql.yml', 'chore', 84),
 ('Dockerfile', 'chore', 24),
 ('www/mkdocs.yml', 'docs', 82),
 ('www/docs/static/schema-pro.json', 'docs', 67),
 ('README.md', 'docs', 31),
 ('pkg/config/config.go', 'feat', 247),
 ('www/mkdocs.yml', 'feat', 33),
 ('.github/workflows/build.yml', 'Merge', 4),
 ('.github/workflows/release.yml', 'Merge', 5),
 ('pkg/config/config.go', 'fix', 60),
 ('www/docs/static/schema.json', 'fix', 20),
 ('www/docs/static/schema.json', 'docs', 30),
 ('Dockerfile', 'chore(deps):', 1),
 ('internal/pipe/nfpm/nfpm_test.go', 'test', 12),
 ('internal/pipe/nfpm/nfpm.go', 'feat', 57),
 ('internal/pipe/nfpm/nfpm_test.go', 'feat', 46),
 ('internal/client/github.go', 'chore', 11),
 ('internal/client/github.go', 'fix', 40),
 ('.github/workflows/build.yml', 'fix', 20),
 ('.github/workflows/release.yml', 'fix', 3),
 ('internal/bu

In [90]:
df = pd.DataFrame(data_source, columns=["file", "reason", "count"])
df

Unnamed: 0,file,reason,count
0,.github/workflows/build.yml,ci,2
1,.github/workflows/build.yml,chore,200
2,.github/workflows/release.yml,chore,130
3,.github/workflows/codeql.yml,chore,84
4,Dockerfile,chore,24
...,...,...,...
703,README.md,zip,1
704,main.go,main,1
705,main.go,git,1
706,main.go,fixes,1


In [91]:
figure = px.bar(df, x="file", y="count", color="reason", height=700)
figure.show()

In [92]:
figure = px.bar(df, x="reason", y="count", color="reason", hover_name="file", height=500)
figure.show()