In [277]:
from requests import Session
from pandas import DataFrame
from dotenv import load_dotenv
from os import getenv
from datetime import datetime

load_dotenv();

In [29]:
class NotFoundError(Exception):
    '''raise this when requested repository is not found'''
    pass

class ApiRateLimitError(Exception):
    '''raise this when API rate limit exceeded'''
    pass

class BadCredentialsError(Exception):
    '''raise this when bad credentials were provided for the API'''
    pass


In [352]:
class Downloader:
    def __init__(self, owner, repo, token=''):
        self.url = 'https://api.github.com/repos/{}/{}'.format(owner, repo)
        self.session = Session()
        
        if token:
            self.session.headers.update({'Authorization': 'token {}'.format(token)})
            
        # checking if the requested repository exists or not
        response = self.session.get(self.url)
        if (response.ok):
            print('The maximum number of requests you are permitted to make per hour: {}'.format(response.headers['X-RateLimit-Limit']))
            print('The number of requests remaining in the current rate limit window: {}'.format(response.headers['X-RateLimit-Remaining']))
        else:
            if (response.status_code == 403):
                raise ApiRateLimitError('API rate limit exceeded. Try to specifyan OAuth token to increase your rate limit.')
            if (response.status_code == 404):
                raise NotFoundError("Repository '{}' of user '{}' not found.".format(repo, owner))
            if (response.status_code == 401):
                raise BadCredentialsError('Bad credentials were provided for the API.')
                
            raise Exception(response.json()['message'])
        
    def get_contributors_statistic(self):
        '''Get contributors list with additions, deletions, and commit counts'''
        data = self.session.get('{}/stats/contributors'.format(self.url)).json()
        
        total_contributions = []
        weekly_contributions = []
        
        for item in data:
            total_contributions.append({
                'commits': item['total'],
                'user': item['author']['login']
            })
            
            for week in item['weeks']:
                weekly_contributions.append({
                    'user': item['author']['login'],
                    'week_unix_ts': week['w'],
                    'date': datetime.fromtimestamp(week['w']).date(),
                    'additions': week['a'],
                    'deletions': week['d'],
                    'commits': week['c'],
                })
                
        self.total_contributions = DataFrame(total_contributions, columns=['user', 'commits'])
        self.weekly_contributions = DataFrame(weekly_contributions, columns=['user', 'week_unix_ts', 'date', 'additions', 'deletions', 'commits'])
        
        return self.total_contributions, self.weekly_contributions
        
    def get_code_frequency_statistic(self):
        '''Returns a weekly aggregate of the number of additions and deletions pushed to a repository'''
        data = self.session.get('{}/stats/code_frequency'.format(self.url)).json()
        
        self.code_frequency = DataFrame(data, columns=['week_unix_ts', 'additions', 'deletions'])
        self.code_frequency['date'] = self.code_frequency.apply(lambda row: datetime.fromtimestamp(row.week_unix_ts).date(), axis = 1)
        
        return self.code_frequency
        
    def get_user_data(self, username):
        '''Provides publicly available information about someone with a GitHub account'''
        return self.session.get('https://api.github.com/users/{}'.format(username)).json()
        
    def get_issues(self):
        '''List issues in a repository'''
        
        print('Fetching repository issues ', end = '')
        
        page = 1
        issues = []        
        
        while(True):     
            print('.', end = '')       
            
            data = self.session.get(
                '{}/issues?per_page=100&page={}'.format(self.url, page)).json()
            
            if len(data) == 0:
                break
            
            for issue in data:
                issues.append({
                    'id': issue['id'],
                    'state': issue['state'],
                    'created_at': datetime.strptime(issue['created_at'], '%Y-%m-%dT%H:%M:%SZ').date()
                })
                
            page = page + 1
            
        print('.')
        
        self.issues = DataFrame(issues, columns=['id', 'state', 'created_at'])
        
        return self.issues
        
    def get_stargazers(self):
        '''Lists the people that have starred the repository'''
        
        print('Fetching stargazers ', end = '')
        
        page = 1
        stargazers = []        
        
        while(True):     
            print('.', end = '')       
            
            data = self.session.get(
                '{}/stargazers?per_page=100&page={}'.format(self.url, page),
                headers = {'Accept': 'application/vnd.github.v3.star+json'}).json()
            
            if len(data) == 0:
                break
            
            for stargazer in data:
                stargazers.append({
                    'user': stargazer['user']['login'],
                    'starred_at': datetime.strptime(stargazer['starred_at'], '%Y-%m-%dT%H:%M:%SZ').date()
                })
                
            page = page + 1
            
        print('.')
        
        self.stargazers = DataFrame(stargazers, columns=['user', 'starred_at'])
        
        return self.stargazers


In [353]:
owner = 'pandas-dev'
repo = 'pandas'

github_token = getenv('GITHUB_OAUTH_TOKEN')

downloader = Downloader(owner, repo, github_token)

The maximum number of requests you are permitted to make per hour: 5000
The number of requests remaining in the current rate limit window: 4553


In [324]:
downloader.get_stargazers()

Fetching stargazers .......................................................................................................................................................................................................................................................


Unnamed: 0,user,starred_at
0,sbusso,2010-08-24
1,auser,2010-08-24
2,pfig,2010-08-24
3,ludwig,2010-08-24
4,bryanveloso,2010-08-24
...,...,...
24453,Larzuwa,2020-04-19
24454,kostekci,2020-04-19
24455,lihuanpeng,2020-04-19
24456,yunbow,2020-04-19


In [325]:
downloader.get_issues()

Fetching repository issues ......................................


Unnamed: 0,id,state,created_at
0,602804860,open,2020-04-19
1,602784606,open,2020-04-19
2,602750949,open,2020-04-19
3,602747636,open,2020-04-19
4,602736456,open,2020-04-19
...,...,...,...
3531,5217005,open,2012-06-22
3532,4863349,open,2012-06-01
3533,4647887,open,2012-05-18
3534,4485088,open,2012-05-09


In [354]:
total_contributions, weekly_contributions = downloader.get_contributors_statistic()

In [355]:
total_contributions

Unnamed: 0,user,commits
0,xhochy,10
1,alexander-ponomaroff,10
2,ArtificialQualia,10
3,AlexKirko,10
4,funnycrab,11
...,...,...
95,TomAugspurger,661
96,jorisvandenbossche,690
97,jbrockmendel,1732
98,jreback,2758


In [356]:
weekly_contributions

Unnamed: 0,user,week_unix_ts,date,additions,deletions,commits
0,xhochy,1249171200,2009-08-02,0,0,0
1,xhochy,1249776000,2009-08-09,0,0,0
2,xhochy,1250380800,2009-08-16,0,0,0
3,xhochy,1250985600,2009-08-23,0,0,0
4,xhochy,1251590400,2009-08-30,0,0,0
...,...,...,...,...,...,...
55995,wesm,1584835200,2020-03-22,0,0,0
55996,wesm,1585440000,2020-03-29,0,0,0
55997,wesm,1586044800,2020-04-05,0,0,0
55998,wesm,1586649600,2020-04-12,0,0,0


In [329]:
downloader.get_code_frequency_statistic()

Unnamed: 0,week_unix_ts,additions,deletions,date
0,1249171200,21659,-4,2009-08-02
1,1249776000,0,0,2009-08-09
2,1250380800,0,0,2009-08-16
3,1250985600,0,0,2009-08-23
4,1251590400,1707,-819,2009-08-30
...,...,...,...,...
555,1584835200,7830,-5951,2020-03-22
556,1585440000,5418,-4602,2020-03-29
557,1586044800,6709,-3022,2020-04-05
558,1586649600,3052,-2322,2020-04-12
