In [27]:
import datetime
import pandas as pd
from github import Github

from gh_helpers import gh_token

In [13]:
token = gh_token()
org = Github(token).get_organization('pcdshub')
repo = org.get_repo('atef')

In [9]:
# can get all repos in org
for repo in org.get_repos():
    break

In [110]:
# get repo activity
# PR added / PR closed / Total PR (change from last period)
# Issue added / Issue closed / Total Issues (change from last period)


today = datetime.datetime.now()
period_start = today - datetime.timedelta(days=7)

def in_period_mask(series: pd.Series, start: datetime.datetime, end: datetime.datetime):
    return (start < series) & (series < end)

def summarize_period(data, start, end, print_results=False):
    """
    Summarize the some key metrics for data inside the period (start, end)
    Assumes each item in ``data`` has attrs: 'number', 'state', 'created_at', 'closed_at'
    """
    # construct dataframe
    info = {'number': [], 'state': [], 'created_at': [], 'closed_at': []}
    for item in data:
        info['number'].append(item.number)
        info['state'].append(item.state)
        info['created_at'].append(item.created_at)
        info['closed_at'].append(item.closed_at)

    df = pd.DataFrame(data=info)

    # find issues created inside the period
    new_in_period = df[in_period_mask(df['created_at'], start, end)]

    # find issues closed inside the period
    closed_in_period = df[in_period_mask(df['closed_at'], start, end)]

    if print_results:
        print(f"opened in last 7 days: {len(new_in_period)}")
        print(f"closed in last 7 days: {len(closed_in_period)}")
        print(f"total open: {len(df[df['state'] == 'open'])}")

    return len(new_in_period), len(closed_in_period)

def summarize_commit_stats(data, start, end, print_results=False):
    info = {'commit_sha': [], 'author_date': [], 'additions': [],
            'deletions': [], 'total': []}
    
    for cm in data:
        info['commit_sha'].append(cm.sha[:10])
        info['author_date'].append(cm.commit.author.date)
        info['additions'].append(cm.stats.additions)
        info['deletions'].append(cm.stats.deletions)
        info['total'].append(cm.stats.total)

    commit_df = pd.DataFrame(data=info)

    return commit_df[in_period_mask(commit_df['author_date'], start, end)]


# Show me the stats

In [114]:
# gather these stats for all the repos
data = {'repo': [], 'issues_opened': [], 'issues_closed': [], 'total_issues': [],
        'pulls_opened': [], 'pulls_closed': [], 'total_pulls': [],
        'n_commits': [], 'additions': [], 'deletions': [], 'total_changes': []}

start_period = today - datetime.timedelta(weeks=8)

for repo in org.get_repos():
    print('', end='\r')
    print(f'processing {repo.name}...', end="\r")
    opened_iss, closed_iss = summarize_period(repo.get_issues(since=start_period), start_period, today)

    opened_pr, closed_pr = summarize_period(repo.get_pulls(), start_period, today)
    data['repo'].append(repo.name)
    data['issues_opened'].append(opened_iss)
    data['issues_closed'].append(closed_iss)
    data['pulls_opened'].append(opened_pr)
    data['pulls_closed'].append(closed_pr)
    data['total_pulls'].append(len(list(repo.get_pulls(state='open'))))
    data['total_issues'].append(len(list(repo.get_issues(state='open'))))

    # commit stats
    commit_df = summarize_commit_stats(repo.get_commits(since=start_period), start_period, today)

    data['n_commits'].append(len(commit_df))
    data['additions'].append(commit_df['additions'].sum())
    data['deletions'].append(commit_df['deletions'].sum())
    data['total_changes'].append(commit_df['total'].sum())

stats_df = pd.DataFrame(data=data)

processing ami-user-library...summary.......ys.....

In [117]:
stats_df.to_csv('/Users/roberttk/gh_loc/data/repo_stats.csv')