## GitHub credentials

In [75]:
gh_username = 'pamelarussell'
gh_oauth_file = 'gh_oauth_token.txt'

## Imports

In [76]:
from time import sleep
import chardet
from github3 import login
from pycurl import Curl
from io import BytesIO
import json
from json.decoder import JSONDecodeError
import pycurl

## Setup

In [77]:
with open(gh_oauth_file) as fh:
    gh_oauth_key = fh.readline().strip()
api_rate_limit_per_hour = 5000
sec_between_requests = 60 * 60 / api_rate_limit_per_hour
url_repos = "https://api.github.com/repos"

## Utility functions for GitHub API

In [78]:
def gh_userpwd(gh_username, gh_oauth_key):
    """ Returns string version of GitHub credentials to be passed to GitHub API
    
    Args:
        gh_username: GitHub username for GitHub API
        gh_oauth_key: (String) GitHub oauth key
    """
    return('%s:%s' %(gh_username, gh_oauth_key))

def sleep_gh_rate_limit():
    """ Sleep for an amount of time that, if done between GitHub API requests for a full hour,
    will ensure the API rate limit is not exceeded.
    """    
    sleep(sec_between_requests + 0.05) 
    
def add_page_num(url, page_num):
    """Add page number to GitHub API request"""
    if "?" in url:
        return "%s&page=%s" %(url, page_num)
    else:
        return "%s?page=%s" %(url, page_num)
    
def validate_response_found(parsed, message = ""):
    """ Check that the GitHub API returned a valid response
    Raises ValueError if response was not found
    
    Args:
        parsed: Parsed JSON response as a dict
        message: Extra info to print
    """
    if "message" in parsed:
        if parsed["message"] == "Not Found":
            raise ValueError("Parsed response has message: Not Found. Further information:\n%s" %message)

def gh_curl_response(url, gh_username, gh_oauth_key):
    """
    Returns the parsed curl response from the GitHub API
    Combines pages if applicable
    
    params:
        url: URL e.g. 'https://api.github.com/repos/samtools/samtools'
        gh_username: GitHub username for GitHub API
        gh_oauth_key: (String) GitHub oauth key
        
    returns:
        Parsed API response. Returns a list of dicts, one for each record, or just one
        dict if the response was a single dict.
        
    """
    page_num = 1
    results = []
    prev_response = None
    while True:
        buffer = BytesIO()
        c = pycurl.Curl()
        c.setopt(c.URL, add_page_num(url, page_num))
        c.setopt(c.USERPWD, gh_userpwd(gh_username, gh_oauth_key))
        c.setopt(c.WRITEDATA, buffer)
        sleep_gh_rate_limit()
        try:
            c.perform()
        except pycurl.error as e:
            print(url)
            raise e
        c.close()
        body = buffer.getvalue()
        try:
            parsed = json.loads(body.decode())
            if "message" in parsed:
                if "API rate limit exceeded" in parsed["message"]:
                    raise PermissionError(parsed["message"])
        except JSONDecodeError:
            print("Caught JSONDecodeError. Returning empty list for URL %s" % url)
            return []
        validate_response_found(parsed, add_page_num(url, page_num))
        if type(parsed) is dict:
            return parsed
        else:
            if len(parsed) == 0:
                break
            else:
                if parsed == prev_response:
                    # Sometimes GitHub API will return the same response for any provided page num
                    break
                else:
                    prev_response = parsed
                    results = results + parsed
                    page_num = page_num + 1
    return results

## Ping the API

In [79]:
gh_curl_response('https://api.github.com', gh_username, gh_oauth_key)

{'current_user_url': 'https://api.github.com/user',
 'current_user_authorizations_html_url': 'https://github.com/settings/connections/applications{/client_id}',
 'authorizations_url': 'https://api.github.com/authorizations',
 'code_search_url': 'https://api.github.com/search/code?q={query}{&page,per_page,sort,order}',
 'commit_search_url': 'https://api.github.com/search/commits?q={query}{&page,per_page,sort,order}',
 'emails_url': 'https://api.github.com/user/emails',
 'emojis_url': 'https://api.github.com/emojis',
 'events_url': 'https://api.github.com/events',
 'feeds_url': 'https://api.github.com/feeds',
 'followers_url': 'https://api.github.com/user/followers',
 'following_url': 'https://api.github.com/user/following{/target}',
 'gists_url': 'https://api.github.com/gists{/gist_id}',
 'hub_url': 'https://api.github.com/hub',
 'issue_search_url': 'https://api.github.com/search/issues?q={query}{&page,per_page,sort,order}',
 'issues_url': 'https://api.github.com/issues',
 'keys_url': '