## USEFUL LINKS

Github Auth Token Instructions: https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line  
Github API: https://developer.github.com/v3/  
PyGithub: https://pygithub.readthedocs.io/en/latest/


## PROGRAM

In [1]:
from github import Github
import pandas as pd
import json

In [None]:
github_token = 'YOUR_GITHUB_TOKEN'
repo_name = 'THE_ORG/THE_REPO'
login = 'USER_NAME'

In [7]:
gh = Github(github_token)

In [8]:
repo = gh.get_repo(repo)
issues_by_creator = repo.get_issues(creator=login)
issue_from_repo = repo.get_issue(number=5450)
user = gh.get_user(login=login)

### GET EVENTS FROM USER AND SORT THEM INTO LISTS OF CATEGORIES

In [2]:
event_objects = {'issue_comments': [], 'created_issues': [], 'pr_review_comments': [], 
                 'pull_requests': [], 'forks':[], 'push': []}

event_types = {'IssueCommentEvent': 'issue_comments',  'IssuesEvent': 'created_issues', 'ForkEvent' : 'forks',
               'PullRequestReviewCommentEvent': 'pr_review_comments', 'PullRequestEvent': 'pull_requests', 'PushEvent': 'push'}

for i, event in enumerate(user.get_events()):
    if event.type != 'WatchEvent' and event.type != 'CreateEvent':
        event_type = event_types[event.type]
        event_objects[event_type] += [event]
    else:
        print i, event#.created_at, event.type, event.id
print ''

for event_type, event_obj_list in event_objects.items():
    print event_type + ':', len(event_obj_list)
    
# created_issues: 10
# pull_requests: 9
# pr_review_comments: 2
# issue_comments: 81
# push: 20
# forks: 1

### PARSE PAYLOADS FOR DESIRED DATA

In [305]:
event_data = {}

for event_type, events in event_objects.items():
    event_data[event_type] =

event_data['created_issues'] = [get_issue_data(issue) for issue in event_objects['created_issues'] ]
event_data['issue_comments'] =  [get_issue_comment_data(issue) for issue in event_objects['issue_comments'] ]
event_data['pr_review_comments'] = [get_pr_review_comment_data(issue) for issue in event_objects['pr_review_comments'] ]
event_data['pull_requests'] =  [get_pull_request_data(issue) for issue in event_objects['pull_requests'] ]
event_data['push'] =  [get_push_data(issue) for issue in event_objects['push'] ]

### WRITE TO CSV

In [3]:
for event_type, events in event_data.items():
    print event_type
    event_df = pd.DataFrame.from_dict(events, orient='columns')
    event_df.to_csv('../data/github/' + event_type + '.csv', index=False, encoding='utf-8')

### URL'S (NOT NECCESSARY, JUST FYI)

In [None]:
USER: u'https:/github.com/:user'
ISSUES:   u'https://github.com/:org/:repo/issues/:issue_no',
COMMENTS: u'https://github.com/:org/:repo/issues/:issue_no#issuecomment-:comment_id',
PULL REQUEST: u'https://github.com/:org/:repo/pull/:pull_id'
PR REVIEW COMMENT: u'https://github.com/:org/:repo/pull/:pull_id#discussion_r:comment_id'

## DATA DESIRED FROM PAYLOAD

### CREATING ISSUES

In [None]:
# ISSUE EVENT
u'id': 123456, # USER THAT CREATED ISSUE
u'login': u'the_user', # USER NAME OF USER THAT CREATED ISSUE
u'https://github.com/the_user', # USER HOME

u'title': u'Issue Title', # ISSUE TITLE
u'body': 'Opening text...' # TEXT OF ORIGINAL COMMENT THAT BEGAN THE ISSUE
u'id': 123456789, # ID OF THE INITIAL COMMENT
    
u'created_at': u'2019-04-26T01:58:48Z',
u'updated_at': u'2019-04-29T13:51:04Z',
u'closed_at': u'2019-04-29T13:51:03Z',

u'number': 1234 # ISSUE NUMBER
u'html_url': u'https://github.com/org/repo/issues/issue_no', # URL OF ISSUE
u'comments': 11 # TOTAL NUMBER OF COMMENTS ON THE ISSUE

### COMMENTS

In [64]:
# IssueCommentEvent

# COMMENT
u'comment': {
u'body': u'Comment text...'
u'created_at': u'2019-05-02T15:32:48Z',
u'html_url': u'https://github.com/org/repo/issues/issue_no#issuecomment-comment_id'
u'id': 123456789
u'updated_at': u'2019-05-02T15:32:48Z'}

# USER
u'user': {
u'html_url': u'https://github.com/user',
u'id': 123456,
u'login': u'user',
u'comments': 4,
u'created_at': u'2019-04-29T15:39:42Z',

# ISSUE
u'created_at': u'2019-04-29T15:39:42Z'
u'comments': 4,
u'id': 123456789, # ISSUE ID
u'number': 123,
u'title': u'Title of the issue',
u'updated_at': u'2019-05-02T15:32:48Z',
u'id': 12345678, # ISSUE CREATOR ID
u'login': u'HarshKhandeparkar',
u'html_url': u'https://github.com/publiclab/spectral-workbench/issues/451'}

### FORK EVENT

In [81]:
# FORK INFO
u'created_at': u'2019-04-10T15:59:42Z',
u'updated_at': u'2019-04-10T02:18:44Z',
u'full_name': u'user/repo',
        
# USER INFO
u'html_url': u'https://github.com/user',
u'id': 123456,
u'login': u'user',

### PULL REQUEST

In [None]:
u'number': 1234, # ISSUE OF PULL REQUEST
u'html': {u'
u'href': u'https://github.com/publiclab/plots2/pull/5459'},
u'additions': 25,
u'created_at': u'2013-06-05T14:36:10Z', # ORIGINAL REPO CREATED AT
u'id': 12345678,
u'name': u'plots2',
u'html_url': u'https://github.com/publiclab',
u'id': 1234567,
u'login': u'repo_org',
        
u'pushed_at': u'2019-04-11T13:21:53Z',
u'changed_files': 3,
u'comments': 2,
u'commits': 3,
u'label': u'user:fork-name',
u'ref': u'user:fork-name',
u'created_at': u'2019-04-10T15:59:42Z',
u'id': 123456789,
u'id': 123456, # USER ID
u'login': u'user',                          

## FUNCTIONS

### GET ISSUE DATA

In [119]:
def get_issue_data(issue):
    
    data = {}
    
    data['user_id']  = issue.payload['issue']['user']['id']
    data['user_name'] = issue.payload['issue']['user']['login']
    data['text'] = issue.payload['issue']['body']
    data['created_at'] = issue.payload['issue']['created_at']
    data['updated_at'] = issue.payload['issue']['updated_at']
    data['closed_at'] = issue.payload['issue']['closed_at']
    data['no_comments'] = issue.payload['issue']['comments']
    data['initial_comment_id'] = issue.payload['issue']['id']
    data['issue_no'] = issue.payload['issue']['number']
    data['title'] = issue.payload['issue']['title']
    data['state'] = issue.payload['issue']['state']
    data['repo'] = issue.payload['issue']['html_url'].split('/')[4]
    
    return data

# get_issue_data(event_objects['create_issue'][0])

### GET ISSUE COMMENT DATA

In [168]:
def get_issue_comment_data(issue_comment):
    
    data = {}
    
    data['user_id']  = issue_comment.payload['comment']['user']['id']
    data['user_name'] = issue_comment.payload['comment']['user']['login']
    data['text'] = issue_comment.payload['comment']['body']
    data['created_at'] = issue_comment.payload['comment']['created_at']
    data['comment_id'] = issue_comment.payload['comment']['id']
    data['issue_no'] = issue_comment.payload['issue']['number']
    data['title'] = issue_comment.payload['issue']['title']
    data['updated_at'] = issue_comment.payload['issue']['updated_at']

    data['closed_at'] = issue_comment.payload['issue']['closed_at']
    data['no_comments'] = issue_comment.payload['issue']['comments']
    data['state'] = issue_comment.payload['issue']['state']

    data['initial_comment_id'] = issue_comment.payload['issue']['id']
    data['repo'] = issue_comment.payload['issue']['html_url'].split('/')[4]

    return data

    



# get_issue_comment_data(event_objects['issue_comment'][0])

### GET PULL REQUEST DATA

In [203]:
def get_pull_request_data(pull_request):
    
    data = {}
    
    data['user_id'] = pull_request.payload['pull_request']['user']['id']
    data['user_name'] = pull_request.payload['pull_request']['user']['login']
    data['pull_no'] = pull_request.payload['number']
    data['title'] = pull_request.payload['pull_request']['title']
    data['created_at'] = pull_request.payload['pull_request']['created_at']
    data['updated_at'] = pull_request.payload['pull_request']['updated_at']
    data['additions'] = pull_request.payload['pull_request']['additions']
    data['changed_files'] = pull_request.payload['pull_request']['changed_files']
    data['closed_at'] = pull_request.payload['pull_request']['closed_at']
    data['commits'] = pull_request.payload['pull_request']['commits']
    data['repo_id'] = pull_request.payload['pull_request']['base']['repo']['id']
    data['repo'] = pull_request.payload['pull_request']['base']['repo']['name']
    
    return data

# get_pull_request_data(event_objects['pull_request'][0])

### GET PR REVIEW COMMENT DATA

In [232]:
def get_pr_review_comment_data(pr_review):
    
    data = {}
    
    data['user_id'] = pr_review.payload['comment']['user']['id']
    data['user_name'] = pr_review.payload['comment']['user']['login']
    data['comment_id'] = pr_review.payload['comment']['id']
    data['created_at'] = pr_review.payload['comment']['created_at']
    data['updated_at'] = pr_review.payload['comment']['updated_at']
    data['text'] = pr_review.payload['comment']['body']
    data['pr_review_id'] = pr_review.payload['comment']['pull_request_review_id']
    data['repo_id'] = pr_review.payload['pull_request']['base']['repo']['id']
    data['repo_name'] = pr_review.payload['pull_request']['base']['repo']['name']
    data['state'] = pr_review.payload['pull_request']['state']
    
    return data

# get_pr_review_comment_data(event_objects['pr_review_comment'][0])

### GET PUSH DATA

In [297]:
def get_push_data(push_event):
    
    data = {}
    
    data['user_id']  = push_event.payload['commits'][0]['author']['name']
    data['message'] = push_event.payload['commits'][0]['message']
    data['push_id'] = push_event.payload['push_id']
    
    return data

# get_push_data(event_objects['push'][14])