# Github lab

This project (lab):
- https://github.com/mtulio/github-insights-reports

Doc ref:
- https://docs.github.com/en/rest/reference/pulls#list-pull-requests
- rate limiting https://docs.github.com/en/rest/overview/resources-in-the-rest-api#rate-limiting

Report:
- https://datastudio.google.com/reporting/bb1170df-e8a5-479f-b75e-79b1908e9041/page/llcYC/edit

Datasources:
- https://docs.google.com/spreadsheets/d/1jjLZ-G16RJp4R1mqsdnvlS7y1dlNLdu-eDN-WvNygCA/edit

In [None]:
# install deps (TODO add to requirements.txt when is done)
!pip install --upgrade requests gspread google-api-python-client google-auth-httplib2 google-auth-oauthlib pandas

In [None]:
import os
import requests
from pprint import pprint

In [None]:
# gspreed
import gspread

#!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib

from google.oauth2 import service_account
import pandas as pd

In [None]:
base_url="https://api.github.com"

client_id=os.getenv('GH_CLIENT_ID')
client_secret=os.getenv('GH_CLIENT_SECRET')

a_cid=(f"client_id={client_id}")
a_csec=(f"client_secret={client_secret}")

In [None]:
gce_sheet_scopes = [
    "https://www.googleapis.com/auth/spreadsheets",
    "https://www.googleapis.com/auth/drive"
]
gce_sheet_credentials = ("/home/splat/secrets/credentials.json")

In [None]:
def sheet_login():
    creds = service_account.Credentials.from_service_account_file(gce_sheet_credentials)
    scoped_credentials = creds.with_scopes(gce_sheet_scopes)
    gc = gspread.authorize(scoped_credentials)
    return gc

In [None]:
def sheet_reader():
    gc = sheet_login()
    sheet = gc.open("splat-github-insights")
    tab = sheet.worksheet("repoContribRole")
    data = tab.get_all_records()
    df = pd.DataFrame(data)
    return df

In [None]:
#sheet_reader().T.reset_index().T.values.tolist()

In [None]:
def sheet_writer(data):
    gc = sheet_login()
    sh = gc.open("splat-github-insights")
    ws = sh.worksheet("repoContribRole")
    ws.update(data, value_input_option='USER_ENTERED')

In [None]:
def find_next_page(link_header):
    pagesRef = link_header.split(',')
    for ref in pagesRef:
        refUrl = ref.split('; ')[0]
        refRel = ref.split('; ')[1]
        #print(refUrl)
        #print(f"[{refRel}]")
        if refRel != 'rel="next"':
            continue
        page = refUrl.split("page=")[1].split('>')[0]
        return page
    return None

In [None]:
def get_pulls_from_repo(org_name=None, repo_name=None, page=None):
    url = (f"{base_url}/repos/{org_name}/{repo_name}/pulls?{a_cid}&{a_csec}")
    if page:
        url = (f"{url}&page={page}")
    
    try:
        resp = requests.get(url)
    except:
        print("get_pulls_from_repo() e1")
        raise
    
    try:
        respData = resp.json()
    except:
        print("get_pulls_from_repo() e2")
        raise
    
    # extract page
    try:
        if 'Link' not in resp.headers:
            return respData, None
        page = find_next_page(resp.headers['Link'])
        return respData, page
    except:
        raise

In [None]:
def get_repo(org_name=None, repo_name=None):
    repo_pulls = []
    page = None
    while True:
        print(f"repo_pulls {len(repo_pulls)}, page={page}")
        pulls, page = get_pulls_from_repo(org_name, repo_name, page)
        
        print(pulls, page)
        if 'message' in pulls:
            if 'API rate limit exceeded' in pulls['message']:
                print("ERROR - rate limited: ")
                print(pulls)
                break
        repo_pulls = repo_pulls + pulls

        if page is None:
            break

    return repo_pulls

In [None]:
def get_users_asoc(results):
    user_assoc = {}
    user_assoc_sum = {}
    for i in results:
        #print(i['number'], i['state'], i['user']['login'], i['author_association'])
        try:
            user_assoc[i['author_association']].append(i['user']['login'])
        except KeyError:
            user_assoc[i['author_association']] = []
            user_assoc[i['author_association']].append(i['user']['login'])
            pass
        except:
            raise
    for ua in user_assoc.keys():
        perc = (len(user_assoc[ua]) / len(repo_pulls) ) * 100
        #print(f"{ua} = {} ({ perc }% )")
        user_assoc_sum[ua] = {
            "name": ua,
            "total": len(user_assoc[ua]),
            "perc": perc
        }
    return user_assoc, user_assoc_sum

## Getting data

In [None]:
sheet_data = [[
    "ORG", "PROJECT", "ROLE", "USERNAME"
]]

In [None]:
def collect_repo_pulls_open(org_name, repo_name):
    repo_pulls = get_repo(org_name, repo_name)
    users_assoc, ua_sum = get_users_asoc(repo_pulls)
    pprint(ua_sum)
    for ua in users_assoc.keys():
        for user in users_assoc[ua]:
            sheet_data.append([
                org_name, repo_name, ua, user
            ])

In [None]:
org_name="openshift"
repo_name="installer"
collect_repo_pulls_open(org_name, repo_name)

In [None]:
org_name="openshift"
repo_name="openshift-docs"
collect_repo_pulls_open(org_name, repo_name)

In [None]:
org_name="kubernetes-sigs"
repo_name="cloud-provider-azure"
collect_repo_pulls_open(org_name, repo_name)

In [None]:
org_name="kubernetes"
repo_name="kubernetes"
collect_repo_pulls_open(org_name, repo_name)

In [None]:
org_name="kubernetes-sigs"
repo_name="kustomize"
collect_repo_pulls_open(org_name, repo_name)

In [None]:
org_name="openshift"
repo_name="console"
collect_repo_pulls_open(org_name, repo_name)

In [None]:
# Write to gsheet
sheet_writer(sheet_data)