In [None]:
#Use Github REST API to get all the user statistics on our organization's repos

In [21]:
import requests
import os
from pprint import pprint
import numpy as np
import pandas as pd
import xlsxwriter
import requests
from requests.auth import HTTPBasicAuth
import json
import base64
import re

In [22]:
#returns a list of reponames found in a charts file. Note that VibrentHealth is in front of all of the names
def get_repos(userid, token, path, org):
    
    auth = HTTPBasicAuth(userid, token)

    headers = {
        "Accept": "application/json"
    }

    params= {
        'ref':'develop'
    }
    
    response = requests.request(
        "GET",
        path,
        headers=headers,
        auth=auth,
        params=params
    )

    allfields = json.loads(response.text)
    coded_string = allfields['content']
    reposfile = base64.b64decode(coded_string)

    reponames = re.findall(f'git: .+({org}/.+)', reposfile.decode('ascii'))
    
    suffix='.git'
    reponames = [name[0:-4] if suffix in name else name for name in reponames]

    return reponames

In [23]:
#returns a combined list of reponames from all potential charts files
#add a new charts file here and then extend the list.
def prepare_reponames(userid, token, org):
    
    reponames = get_repos_from_charts(userid, token, githuburl+'/repos/'+org+'/reposfile.yaml', org)
    #more repos can be insertd by pulling in data from other files as well.
    #reponames2 = ...
    #reponames.extend(reponames2)

    #strip the .git at the end and also remove duplicates
    reponames = list(set(reponames))
    
    return reponames

In [None]:
#The following variables must be found in the environment:
import os
try:  
   userid = os.environ['GIT_USERID']
except KeyError: 
   print ("Please set the environment variable GIT_TOKEN")

try:  
   token = os.environ['GIT_TOKEN']
except KeyError: 
   print ("Please set the environment variable GIT_TOKEN")

try:  
   githuburl = os.environ['GIT_URL']
except KeyError: 
   print ("Please set the environment variable GIT_TOKEN")

try:  
   org = os.environ['GIT_ORG']
except KeyError: 
   print ("Please set the environment variable GIT_TOKEN")

In [None]:
reponames = prepare_reponames(userid, token, org)

In [84]:
contribStatsDict = {}

In [85]:
from datetime import datetime

for repo in reponames:

    auth = HTTPBasicAuth(userid, token)

    headers = {
        "Accept": "application/json"
    }

    url = githuburl+'/repos/'+repo+'/stats/contributors'
    print(url)

    #need to also handle pagination
    response = requests.request(
        "GET",
        url,
        headers=headers,
        auth=auth
    )
    
    cList = json.loads(response.text)
    for contrib in cList:
        
        if contrib['author']['type'] == 'Bot':
            continue

        total = 0
        for week in contrib['weeks']:
            
            dt = datetime.fromtimestamp(week['w'])
            if dt.year >= 2023:
                total = total + int(week['c']) #int(week['a']) + int(week['d'])

        #if contrib['author']['type'] == 'User':
        #    print('{}: {}'.format(contrib['author']['login'], total))
        #else:
        #    print('{}: {}'.format(contrib['author']['email'], total))
        
        if total > 0:
            if not repo in contribStatsDict:
                contribStatsDict[repo] = {}
                
            contribStatsDict[repo][contrib['author']['login']] = total
            

https://api.github.com/repos/VibrentHealth/mission-control-reporting-ui/stats/contributors
https://api.github.com/repos/VibrentHealth/pmt-audit-log-service/stats/contributors
https://api.github.com/repos/VibrentHealth/ancillary-study/stats/contributors
https://api.github.com/repos/VibrentHealth/study-in-a-box/stats/contributors
https://api.github.com/repos/VibrentHealth/vrp-pipelines-api/stats/contributors
https://api.github.com/repos/VibrentHealth/ehr-service/stats/contributors
https://api.github.com/repos/VibrentHealth/drc-hos-microservice/stats/contributors
https://api.github.com/repos/VibrentHealth/insights/stats/contributors
https://api.github.com/repos/VibrentHealth/smp-hos-ui/stats/contributors
https://api.github.com/repos/VibrentHealth/dx-communicator/stats/contributors
https://api.github.com/repos/VibrentHealth/vxp-communication-microservice/stats/contributors
https://api.github.com/repos/VibrentHealth/vxp-hos-microservice/stats/contributors
https://api.github.com/repos/Vibren

In [86]:
def github_user_info(row):
    url = githuburl+'/users/'+ row.name

    #need to also handle pagination
    response = requests.request(
        "GET",
        url,
        headers=headers,
        auth=auth
    )
    
    userInfo = json.loads(response.text)
    row['Name'] = userInfo['name']
    row['Company'] = userInfo['company']
    row['Email'] = userInfo['email']
    
    return row

In [87]:
from datetime import datetime

contribStatsDict_df = pd.DataFrame(contribStatsDict)

contribStatsDict_df.loc["Totals"] = contribStatsDict_df.sum(axis=0)
contribStatsDict_df['Totals'] = contribStatsDict_df.sum(axis=1)

In [88]:
contribStatsDict_df = contribStatsDict_df.apply(github_user_info, axis=1)

col = contribStatsDict_df.pop('Company')
contribStatsDict_df.insert(0, col.name, col)

col = contribStatsDict_df.pop('Email')
contribStatsDict_df.insert(0, col.name, col)

col = contribStatsDict_df.pop('Name')
contribStatsDict_df.insert(0, col.name, col)

#dump the output into xls

writer = pd.ExcelWriter('git-analysis.xlsx', mode = 'w')
contribStatsDict_df.to_excel(writer, sheet_name=datetime.today().strftime('%Y-%m-%d'))

writer.save()