In [1]:
# SecureString parameter stored in AWS System Manager that holds a GitHub personal access token.
parameter_name = 'github_token'

# Organization names that contain members considered employees.
organization_names = ['aws', 'awslabs', 'aws-amplify', 'aws-samples']

# Repositories for which reports will be generated.
repo_names = ['aws-amplify/amplify-android',
              'aws-amplify/aws-sdk-android',
              'awslabs/aws-mobile-appsync-sdk-android']

In [2]:
# Retrieve the GitHub token from SSM to prevent oops-I-pushed-credentials-to-GitHub uh-ohs.

import boto3
from github import Github

ssm = boto3.client('ssm')
response = ssm.get_parameter(Name=parameter_name, WithDecryption=True)
token = response['Parameter']['Value']
github = Github(token)

In [3]:
# Build a set of GitHub handles of employees to later exclude their activity from the report.
# TODO: Is this kosher? Perhaps TAMs, SAs, etc are opening issues on behalf of their customers?

employees = set()

for organization_name in organization_names:
    organization = github.get_organization(organization_name)
    
    for member in organization.get_members():
        employees.add(member.login)

In [4]:
# Loop through each repository, grab all issues, and create a DataFrame for each.

import pandas as pd

repos = {}

for repo_name in repo_names:
    repo = github.get_repo(repo_name)
    issues = []

    for issue in repo.get_issues(state='all'):
        labels = pd.array([label.name for label in issue.labels])
        
        if issue.pull_request is None and issue.user.login not in employees:
            issues.append([labels, issue.created_at, issue.closed_at])
        
    repos[repo_name] = pd.DataFrame(issues, columns=['labels', 'created_at', 'closed_at'])

In [5]:
# Using the DataFrames, process and grab the counts of issues.

from datetime import datetime
from IPython.display import display, HTML

seven_days_ago = datetime.now() - pd.Timedelta('7 days')
thirty_days_ago = datetime.now() - pd.Timedelta('30 days')

open_counts = []

for repo_name in repo_names:
    r = repos[repo_name]

    open_issues = r[~(r.closed_at > '1970-01-01')]
    last_week_open_issues = r[(r.created_at < seven_days_ago) & ~(r.closed_at < seven_days_ago)]
    last_month_open_issues = r[(r.created_at < thirty_days_ago) & ~(r.closed_at < thirty_days_ago)]
    open_issues_count = len(open_issues.index)
    last_week_open_issues_count = len(last_week_open_issues.index)
    last_month_open_issues_count = len(last_month_open_issues.index)
    
    open_counts.append([repo_name,
                        open_issues_count,
                        last_week_open_issues_count,
                        open_issues_count - last_week_open_issues_count,
                        last_month_open_issues_count,
                        open_issues_count - last_month_open_issues_count])


open_df = pd.DataFrame(open_counts, columns=['Repo', 'Open', 'Last Week', 'WoW', 'Last Month', 'MoM'])

In [6]:
# Build the report for display.

display(HTML(f'<em>Generated on {datetime.now()}'))
display(HTML('<h1>Open Issues</h1>'))
display(open_df)

Unnamed: 0,Repo,Open,Last Week,WoW,Last Month,MoM
0,aws-amplify/amplify-android,9,9,0,8,1
1,aws-amplify/aws-sdk-android,126,126,0,121,5
2,awslabs/aws-mobile-appsync-sdk-android,42,45,-3,44,-2
