In [1]:
import os
from datetime import datetime, timedelta
from os.path import dirname, basename

import git
import pandas as pd
import plotly.express as px

# here it's good to get our bearings

In [2]:
os.getcwd()

'/Users/timottinger/Projects/gitminer-jupyter/Notebooks'

# read a repository to graph

In [3]:
wfx = "/Users/timottinger/Projects/siemens-wfx"
goreleaser = "/Users/timottinger/Projects/goreleaser"
android = "../../ClassDojo/dojo-android"
repo = git.Repo(android)

In [4]:
repo_title = basename(dirname(repo.common_dir))

In [5]:
def create_tag_interval_df(repo: git.Repo, since: datetime) -> pd.DataFrame:
    source = ((tag_ref.name, tag_ref.commit.authored_datetime) for tag_ref in repo.tags)
    raw_df = pd.DataFrame(data=source, columns=["name", "timestamp"])
    if raw_df.empty:
        return raw_df
    raw_df["timestamp"] = pd.to_datetime(raw_df["timestamp"], utc=True)
    sorted_df = raw_df.sort_values(by=["timestamp"])
    graph_df = sorted_df[sorted_df['timestamp'] > since].copy()
    graph_df['interval'] = graph_df['timestamp'].diff()
    return graph_df


print("Okay")

Okay


we will need to generate a dataframe for the tags, with label and timestamp

Let's filter this down to the given date and later

In [6]:

start_date = datetime.today().astimezone() - timedelta(days=60)
graph_df = create_tag_interval_df(repo, start_date)
graph_df

Unnamed: 0,name,timestamp,interval
869,v6.40.0,2024-06-28 19:35:31+00:00,NaT
871,v6.41.0,2024-07-04 08:15:36+00:00,5 days 12:40:05
873,v6.42.0,2024-07-08 21:07:37+00:00,4 days 12:52:01
875,v6.43.0,2024-07-12 11:26:56+00:00,3 days 14:19:19
876,v6.43.0-codepush.v148,2024-07-12 14:46:17+00:00,0 days 03:19:21
878,v6.44.0,2024-07-12 22:21:18+00:00,0 days 07:35:01
881,v6.45.0,2024-07-19 15:25:11+00:00,6 days 17:03:53
884,v6.46.0,2024-07-26 17:42:25+00:00,7 days 02:17:14
882,v6.45.0-codepush.v149,2024-07-31 22:50:09+00:00,5 days 05:07:44
879,v6.44.0-codepush.v150,2024-07-31 22:50:09+00:00,0 days 00:00:00


This should give us something to work with

In [22]:
figure = px.bar(graph_df, x="timestamp", y="interval", title='Release Delay')
figure.show()


In [None]:
earlier_label = graph_df.iloc[0]['name']
diffcounts = []
for index, data in graph_df.iterrows():
    current_label = data['name']
    diffs = repo.commit(earlier_label).diff(current_label)
    diffcounts.append(len(diffs))
    earlier_label = current_label

diffcounts


In [None]:
graph_df['timestamp'] = pd.to_datetime(graph_df['timestamp'], utc=True)

In [None]:
graph_df['diff_counts'] = diffcounts

In [None]:
graph_df

In [None]:
figure = px.scatter(
    graph_df.iloc[-25:],
    x="timestamp",
    y="diff_counts",
    size="diff_counts",
    color="diff_counts",
    title=f'{repo_title} Release Size and Frequency',
    hover_data=["timestamp", "name", "interval"]
)
figure.update_layout(autosize=True)
figure.update_traces(mode="markers+lines")
figure.show()

In [None]:
dir(figure)

In [23]:
hovertemplate = "%{name}"
figure = px.line(
    graph_df,
    title=f'{repo_title} Release Frequency and Size',
    x="timestamp",
    y="diff_counts",
    hover_name="name",
    markers=True
)
#figure.update_traces(mode="markers")
figure.update_layout(hovermode="x unified")
figure.show()

In [None]:
graph_df.info()


Add a change-type stacked graph based on diffs.


In [None]:
by_date = lambda x: x.commit.authored_datetime
sorted_tags = sorted(repo.tags, key=by_date)

In [None]:

last_20 = sorted_tags[-20:]
for x in last_20:
    print(by_date(x), x)


In [None]:

start = last_20[0]
start


In [None]:
from collections import Counter

change_name = {
    "A": "Files Added",
    "D": "Files Deleted",
    "R": "Files Renamed",
    "M": "Files Modified"
}


def change_series(start, last_20):
    for tag in last_20:
        diffs = start.commit.diff(tag.commit)
        yield {
            'Date': start.commit.committed_datetime.date(),
            'Name': start.name,
            **Counter(change_name[x.change_type] for x in diffs)
        }
        start = tag


change_df = pd.DataFrame(change_series(start, last_20))
change_df

In [None]:
figure = px.bar(
    change_df,
    title=f"Changes Across Tags for {repo_title}",
    x="Name",
    y=list(change_name.values()),
    labels=["Added", "Deleted", "Modified", "Removed"],
    hover_name="Name",
    hover_data=["Date"],
    text_auto='.2s'
)
figure.show()

In [None]:
figure = px.area(
    change_df,
    title=f"Changes Across Tags for {repo_title}",
    x="Name",
    y=list(change_name.values()),
    labels=["Added", "Deleted", "Modified", "Removed"],
    hover_name="Name",
    hover_data=["Date"],
)
figure.show()