In [None]:
import requests
import os
from pprint import pprint
import numpy as np
import pandas as pd
import xlsxwriter
import requests
from requests.auth import HTTPBasicAuth
import json
import base64
import re

In [None]:
#returns a list of reponames found in a charts file. Note that VibrentHealth is in front of all of the names
def get_repos(userid, token, path, org):
    
    auth = HTTPBasicAuth(userid, token)

    headers = {
        "Accept": "application/json"
    }

    params= {
        'ref':'develop'
    }
    
    response = requests.request(
        "GET",
        path,
        headers=headers,
        auth=auth,
        params=params
    )

    allfields = json.loads(response.text)
    coded_string = allfields['content']
    reposfile = base64.b64decode(coded_string)

    reponames = re.findall(f'git: .+({org}/.+)', reposfile.decode('ascii'))
    
    suffix='.git'
    reponames = [name[0:-4] if suffix in name else name for name in reponames]

    return reponames

In [None]:
#returns a combined list of reponames from all potential charts files
#add a new charts file here and then extend the list.
def prepare_reponames(userid, token, org):
    
    reponames = get_repos_from_charts(userid, token, githuburl+'/repos/'+org+'/reposfile.yaml', org)
    #more repos can be insertd by pulling in data from other files as well.
    #reponames2 = ...
    #reponames.extend(reponames2)

    #strip the .git at the end and also remove duplicates
    reponames = list(set(reponames))
    
    return reponames

In [None]:
#The following variables must be found in the environment:
import os
try:  
   userid = os.environ['GIT_USERID']
except KeyError: 
   print ("Please set the environment variable GIT_TOKEN")

try:  
   token = os.environ['GIT_TOKEN']
except KeyError: 
   print ("Please set the environment variable GIT_TOKEN")

try:  
   githuburl = os.environ['GIT_URL']
except KeyError: 
   print ("Please set the environment variable GIT_TOKEN")

try:  
   org = os.environ['GIT_ORG']
except KeyError: 
   print ("Please set the environment variable GIT_TOKEN")

In [None]:
reponames = prepare_reponames(userid, token, org)

In [None]:
cAdminDict = {}
cMaintDict = {}
cPushDict = {}
cPullDict = {}

In [None]:
from datetime import datetime

headers = {
    "Accept": "application/json"
}
    
params = {
    'per_page':100,
}

auth = HTTPBasicAuth(userid, token)
    
for repo in reponames:
   
    url = githuburl+'/repos/VibrentHealth/'+repo+'/collaborators'
    print(url)
    
    params['page'] = 1

    cAdminDict[repo] = {}
    cMaintDict[repo] = {}
    cPushDict[repo] = {}
    cPullDict[repo] = {}
    
    while True:
        #need to also handle pagination
        response = requests.request(
            "GET",
            url,
            headers=headers,
            auth=auth,
            params=params     
        )
            
        uList = json.loads(response.text)
        if not uList or uList == []:
            break;
            
        for user in uList:

            if user['type'] == 'Bot':
                continue

            cAdminDict[repo][user['login']] = user['login']
            cMaintDict[repo][user['login']] = user['login']
            cPushDict[repo][user['login']] = user['login']
            cPullDict[repo][user['login']] = user['login']

            #print('login={}, permissions={}, role={}, site_admin={}'.format(user['login'], user['permissions'], user['role_name'], user['site_admin']))

        params['page'] = params['page'] + 1
       

In [None]:
#need to get the teams, team members and team projects
#/orgs/{org}/teams
#/orgs/{org}/teams/{team_slug}/repos
#/orgs/{org}/teams/{team_slug}/members

teamsDict = {}

headers = {
    "Accept": "application/json"
}
    
params = {
    'per_page':100,
}

auth = HTTPBasicAuth(userid, token)
   
url = githuburl+'/orgs/VibrentHealth/teams'
print(url)
    
params['page'] = 1
    
while True:
    #need to also handle pagination
    response = requests.request(
        "GET",
        url,
        headers=headers,
        auth=auth,
        params=params     
    )
            
    tList = json.loads(response.text)
    if not tList or tList == []:
        break;
            
    for team in tList:
        teamsDict[team['slug']] = {}

    params['page'] = params['page'] + 1
       

In [None]:
import copy

membersDict = copy.deepcopy(teamsDict)

In [None]:
#/orgs/{org}/teams/{team_slug}/repos

headers = {
    "Accept": "application/json"
}
    
params = {
    'per_page':100,
}

auth = HTTPBasicAuth(userid, token)
    
for team in teamsDict:
   
    url = githuburl+'/orgs/VibrentHealth/teams/' + team + '/repos'
    print(url)
    
    params['page'] = 1
    
    while True:
        #need to also handle pagination
        response = requests.request(
            "GET",
            url,
            headers=headers,
            auth=auth,
            params=params     
        )
            
        rList = json.loads(response.text)
        if not rList or rList == []:
            break;
            
        for repo in rList:
            #print(repo['name'])
            teamsDict[team][repo['name']] = repo['name']

        params['page'] = params['page'] + 1


https://api.github.com/orgs/VibrentHealth/teams/test-automation-genius-bar/repos
https://api.github.com/orgs/VibrentHealth/teams/test-automation/repos
https://api.github.com/orgs/VibrentHealth/teams/test-automation_rw/repos
https://api.github.com/orgs/VibrentHealth/teams/tff-android/repos
https://api.github.com/orgs/VibrentHealth/teams/tff-automation/repos
https://api.github.com/orgs/VibrentHealth/teams/tff-ios/repos
https://api.github.com/orgs/VibrentHealth/teams/tff-server/repos
https://api.github.com/orgs/VibrentHealth/teams/the-little-rascals/repos
https://api.github.com/orgs/VibrentHealth/teams/tigers-qa_read-access/repos
https://api.github.com/orgs/VibrentHealth/teams/tigers-writeaccess/repos
https://api.github.com/orgs/VibrentHealth/teams/titans/repos
https://api.github.com/orgs/VibrentHealth/teams/unison-contractor/repos
https://api.github.com/orgs/VibrentHealth/teams/user-milestone-admin/repos
https://api.github.com/orgs/VibrentHealth/teams/vajra-qa_read-access/repos
https://a

In [None]:
#/orgs/{org}/teams/{team_slug}/members
headers = {
    "Accept": "application/json"
}
    
params = {
    'per_page':100,
}

auth = HTTPBasicAuth(userid, token)
    
for team in teamsDict:
   
    url = githuburl+'/orgs/VibrentHealth/teams/' + team + '/members'
    print(url)
    
    params['page'] = 1
    
    while True:
        #need to also handle pagination
        response = requests.request(
            "GET",
            url,
            headers=headers,
            auth=auth,
            params=params     
        )
            
        mList = json.loads(response.text)
        if not mList or mList == []:
            break;
            
        for member in mList:
            membersDict[team][member['login']] = member['login']

        params['page'] = params['page'] + 1


https://api.github.com/orgs/VibrentHealth/teams/techops/members
https://api.github.com/orgs/VibrentHealth/teams/techops-leads/members
https://api.github.com/orgs/VibrentHealth/teams/techops-pm/members
https://api.github.com/orgs/VibrentHealth/teams/test-automation-genius-bar/members
https://api.github.com/orgs/VibrentHealth/teams/test-automation/members
https://api.github.com/orgs/VibrentHealth/teams/test-automation_rw/members
https://api.github.com/orgs/VibrentHealth/teams/tff-android/members
https://api.github.com/orgs/VibrentHealth/teams/tff-automation/members
https://api.github.com/orgs/VibrentHealth/teams/tff-ios/members
https://api.github.com/orgs/VibrentHealth/teams/tff-server/members
https://api.github.com/orgs/VibrentHealth/teams/the-little-rascals/members
https://api.github.com/orgs/VibrentHealth/teams/tigers-qa_read-access/members
https://api.github.com/orgs/VibrentHealth/teams/tigers-writeaccess/members
https://api.github.com/orgs/VibrentHealth/teams/titans/members
https://

In [None]:
def github_user_info(row):
    url = githuburl+'/users/'+ row.name

    #need to also handle pagination
    response = requests.request(
        "GET",
        url,
        headers=headers,
        auth=auth
    )
    
    userInfo = json.loads(response.text)
    row['Name'] = userInfo['name']
    row['Company'] = userInfo['company']
    row['Email'] = userInfo['email']
    row['Type'] = userInfo['type']
    
    return row

In [None]:
#cDict_df = cDict_df.apply(github_user_info, axis=1)

#col = cDict_df.pop('Company')
#cDict_df.insert(0, col.name, col)

#col = cDict_df.pop('Email')
#cDict_df.insert(0, col.name, col)

#col = cDict_df.pop('Name')
#cDict_df.insert(0, col.name, col)

#col = cDict_df.pop('Type')
#cDict_df.insert(0, col.name, col)

#dump the output into xls

writer = pd.ExcelWriter('git-cleanup.xlsx', mode = 'w')

cAdminDict_df = pd.DataFrame(cAdminDict)
cMaintDict_df = pd.DataFrame(cMaintDict)
cPushDict_df = pd.DataFrame(cPushDict)
cPullDict_df = pd.DataFrame(cPullDict)
membersDict_df = pd.DataFrame(membersDict)
teamsDict_df = pd.DataFrame(teamsDict)

cAdminDict_df.to_excel(writer, sheet_name='AdminPerms', freeze_panes=(1,1))
cMaintDict_df.to_excel(writer, sheet_name='MaintPerms', freeze_panes=(1,1))
cPushDict_df.to_excel(writer, sheet_name='PushPerms', freeze_panes=(1,1))
cPullDict_df.to_excel(writer, sheet_name='PullPerms', freeze_panes=(1,1))
teamsDict_df.to_excel(writer, sheet_name='ReposTeams', freeze_panes=(1,1))
membersDict_df.to_excel(writer, sheet_name='MembersTeams', freeze_panes=(1,1))


#format this
workbook = writer.book

worksheet = writer.sheets['AdminPerms']
header_format = workbook.add_format({'text_wrap': True})
(max_row, max_col) = cAdminDict_df.shape
worksheet.set_column(0, max_col, 25, header_format)

worksheet = writer.sheets['MaintPerms']
header_format = workbook.add_format({'text_wrap': True})
(max_row, max_col) = cMaintDict_df.shape
worksheet.set_column(0, max_col, 25, header_format)

worksheet = writer.sheets['PushPerms']
header_format = workbook.add_format({'text_wrap': True})
(max_row, max_col) = cPushDict_df.shape
worksheet.set_column(0, max_col, 25, header_format)

worksheet = writer.sheets['PullPerms']
header_format = workbook.add_format({'text_wrap': True})
(max_row, max_col) = cPullDict_df.shape
worksheet.set_column(0, max_col, 25, header_format)

worksheet = writer.sheets['ReposTeams']
header_format = workbook.add_format({'text_wrap': True})
(max_row, max_col) = teamsDict_df.shape
worksheet.set_column(0, max_col, 25, header_format)

worksheet = writer.sheets['MembersTeams']
header_format = workbook.add_format({'text_wrap': True})
(max_row, max_col) = membersDict_df.shape
worksheet.set_column(0, max_col, 25, header_format)

writer.save()