In [159]:
import os
from datetime import datetime, timedelta

import git
import pandas as pd
import plotly.express as px

# here it's good to get our bearings

In [160]:
os.getcwd()

# read a repository to graph

In [161]:
def create_tag_interval_df(repo: git.Repo, since: datetime) -> pd.DataFrame:
    source = ((tag_ref.name, tag_ref.commit.authored_datetime) for tag_ref in repo.tags)
    raw_df = pd.DataFrame(data=source, columns=["name", "timestamp"])
    if raw_df.empty:
        return raw_df
    raw_df["timestamp"] = pd.to_datetime(raw_df["timestamp"])
    sorted_df = raw_df.sort_values(by=["timestamp"])
    graph_df = sorted_df[sorted_df['timestamp'] > since].copy()
    graph_df['interval'] = graph_df['timestamp'].diff()
    return graph_df


print("Okay")

we will need to generate a dataframe for the tags, with label and timestamp

Let's filter this down to the given date and later

In [162]:
repo = git.Repo("/Users/timottinger/Projects/siemens-wfx")
start_date = datetime.today().astimezone() - timedelta(days=60)
graph_df = create_tag_interval_df(repo, start_date)
graph_df

This should give us something to work with

In [163]:
figure = px.bar(graph_df, x="timestamp", y="interval", title='Release Delay')
figure.show()


In [164]:
earlier_label = graph_df.iloc[0]['name']
diffcounts = []
for index, data in graph_df.iterrows():
    current_label = data['name']
    diffs = repo.commit(earlier_label).diff(current_label)
    diffcounts.append(len(diffs))
    earlier_label = current_label

diffcounts


In [165]:
graph_df['timestamp'] = pd.to_datetime(graph_df['timestamp'], utc=True)

In [None]:
graph_df['diff_counts'] = diffcounts

In [None]:
graph_df

In [None]:
figure = px.scatter(
    graph_df.iloc[-25:],
    x="timestamp",
    y="diff_counts",
    size="diff_counts",
    color="diff_counts",
    title='Release Size and Frequency',
    hover_data=["timestamp", "name", "interval"]
)
figure.update_layout(autosize=True)
figure.update_traces(mode="markers+lines")
figure.show()

In [None]:
dir(figure)

In [None]:
hovertemplate = "%{name}"
figure = px.line(
    graph_df,
    x="timestamp",
    y="diff_counts",
    title='Release Frequency and Size',
    hover_name="name",
    markers=True
)
#figure.update_traces(mode="markers")
figure.update_layout(hovermode="x unified")
figure.show()

In [None]:
graph_df.info()


Add a change-type stacked graph based on diffs.


In [None]:
by_date = lambda x: x.commit.authored_datetime
sorted_tags = sorted(repo.tags, key=by_date)

In [None]:

last_20 = sorted_tags[-20:]
for x in last_20:
    print(by_date(x), x)


In [None]:

start = last_20[0]
start


In [None]:
from collections import Counter

change_name = {
    "A": "Files Added",
    "D": "Files Deleted",
    "R": "Files Renamed",
    "M": "Files Modified"
}


def change_series(start, last_20):
    for tag in last_20:
        diffs = start.commit.diff(tag.commit)
        yield {
            'Date': start.commit.committed_datetime.date(),
            'Name': start.name,
            **Counter(change_name[x.change_type] for x in diffs)
        }
        start = tag


change_df = pd.DataFrame(change_series(start, last_20))
change_df

In [None]:
figure = px.bar(
    change_df,
    title=f"Changes Across Tags for {os.path.basename(os.path.dirname(repo.common_dir))}",
    x="Name",
    y=list(change_name.values()),
    labels=["Added", "Deleted", "Modified", "Removed"],
    hover_name="Name",
    hover_data=["Date"],
    text_auto='.2s'
)
figure.show()

In [None]:
figure = px.area(
    change_df,
    title=f"Changes Across Tags for {os.path.basename(os.path.dirname(repo.common_dir))}",
    x="Name",
    y=list(change_name.values()),
    labels=["Added", "Deleted", "Modified", "Removed"],
    hover_name="Name",
    hover_data=["Date"],
)
figure.show()