# Github Metrics Collection
This notebook is used to collect github metrics about each RMW. 

In [1]:
import datetime
import sys
import time
import github
import keyring
import requests
import pandas as pd

In [2]:
def rate_limit_to_string(rl):
    rl_reset = datetime.datetime.fromtimestamp(rl.raw_data['core']['reset'])
    return "Rate limit: %d, remaining %d, reset %s" % (rl.raw_data['core']['limit'], rl.raw_data['core']['remaining'], rl_reset.strftime('%c'))


def extract_pull_data(pull):
    """
    Given an API pull request object extract the relevant information and put it in a dict.
    """
    ret_val = {}
    ret_val["state"] = pull.state # open or closed
    ret_val["opened_on"] = pull.created_at
    if pull.state != 'open':
        if pull.merged:
            closed = pull.merged_at
            ret_val["time_to_close"] = (closed - pull.created_at).total_seconds()
            ret_val["closed_at"] = closed
    else:
        ret_val["closed"] = None
        ret_val["time_to_close"] = None
    ret_val["commits"] = pull.commits
    ret_val["number"] = pull.number
    ret_val["title"] = pull.title
    ret_val["url"] = pull.url
    return ret_val

def extract_issue_data(issue):
    """
    Given a GitHub API issue data object cherry pick the data we want and drop it in a dictionary. 
    """
    ret_val = {}
    ret_val["closed_at"] = issue.closed_at
    ret_val["created_at"] = issue.created_at
    ret_val["labels"] = issue.labels
    ret_val["id"] = issue.id
    ret_val["number"] = issue.number
    ret_val["state"] = issue.state
    ret_val["title"] = issue.title
    ret_val["url"] = issue.url
    ret_val["last_mod"] = issue.last_modified
    ret_val["closed_at"] = None
    ret_val["comments"] = issue.comments
    if issue.state != 'open':
         ret_val["turn_round"] =  (issue.closed_at - issue.created_at).total_seconds()
    return ret_val

def extract_commit_stats(commit):
    """
    Given a github API commit object extract the relevant data and put it in a dictionary. 
    """
    ret_val = {}
    try:        
        ret_val["last_modified"] =  datetime.datetime.strptime(commit.last_modified,"%a, %d %b %Y %H:%M:%S %Z")    
        ret_val["author"] =  commit.author.login
        ret_val["sha"] =  commit.sha
        ret_val["changes"] =  commit.stats.total
        ret_val["commiter"] =  commit.committer.login
    except:
        return ret_val
    return ret_val


In [3]:
def pulldown_repo_stats(repository,key,date_cutoff):
    """
    Given a Github repo name and a Github API key, and a date cutoff will pull down all of the data 
    PR, Issue, and Commit data from the current time until the date_cuttoff and 
    return that data as a pandas dataframe
    """
    gh = github.Github(key)
    repo = gh.get_repo(repository)
    print('Fetching pull requests')
    time_to_close_pull_sum = 0
    num_closed_prs = 0
    pulls = repo.get_pulls(state='all')
    raw_list = []
        
    for i_pull, pull in enumerate(pulls):
        try:
            if pull.created_at.date() > date_cutoff:
                dp = extract_pull_data(pull)
                raw_list.append(dp)
            else: # this should be most recent first so we exceed creation time
                # bail.
                break 
        except requests.exceptions.ConnectionError:
            print('Failed reading data for pull request, continuing')
            
        if (i_pull % 100) == 0 :
            print(rate_limit_to_string(gh.get_rate_limit()))

    pr_df = pd.DataFrame(data=raw_list)

    print('Fetching issues')
    issue_list = []
    
    issues = repo.get_issues(state='all')
    for i_issue, issue in enumerate(issues):
        try:
            if issue.created_at.date() > date_cutoff:                      
                issue_data = extract_issue_data(issue)
                issue_list.append(issue_data)
            else: # Dates should be squential so if we get an old one stop processing
                break
        except requests.exceptions.ConnectionError:
            print('Failed reading data for issue, continuing')

        if (i_issue % 100) == 0:
            print(rate_limit_to_string(gh.get_rate_limit()))

    issue_df = pd.DataFrame(data=issue_list)            

    print('Fetching Commits')
    commit_list = []
    
    commits = repo.get_commits()
    for i_commit, commit in enumerate(commits):
        try:

            mod_date = datetime.datetime.strptime(commit.last_modified,"%a, %d %b %Y %H:%M:%S %Z")
            if mod_date.date()> date_cutoff:                      
                commit_data = extract_commit_stats(commit)
                commit_list.append(commit_data)
            else: # Dates should be squential so if we get an old one stop processing
                break
        except requests.exceptions.ConnectionError:
            print('Failed reading data for commit, continuing')

        if (i_commit % 100) == 0:
            print(rate_limit_to_string(gh.get_rate_limit()))

    commit_df = pd.DataFrame(data=commit_list)            

    return commit_df, issue_df, pr_df 


In [11]:
# Get github key
key = keyring.get_password('github-token', 'may-read-repositories')
if key is None:
    print('Failed to get GitHub API key')
else:
    print("Got GitHub Key")
# set the cutoff six months in the past
date_cutoff = datetime.date.today()-datetime.timedelta(days = 30*6)
print(date_cutoff)


Got GitHub Key
2021-04-10


In [6]:
fast_commit, fast_issue, fast_pr = pulldown_repo_stats("eProsima/Fast-DDS",key,date_cutoff)    
fast_commit.to_csv("./data/git_metrics/fast_commit.csv")
fast_issue.to_csv("./data/git_metrics/fast_issue.csv")
fast_pr.to_csv("./data/git_metrics/fast_pr.csv")
fast_pr.head()

Fetching pull requests
Rate limit: 5000, remaining 4601, reset Thu Oct  7 13:53:11 2021
Rate limit: 5000, remaining 4498, reset Thu Oct  7 13:53:11 2021
Rate limit: 5000, remaining 4395, reset Thu Oct  7 13:53:11 2021
Fetching issues
Rate limit: 5000, remaining 4375, reset Thu Oct  7 13:53:11 2021
Rate limit: 5000, remaining 4372, reset Thu Oct  7 13:53:11 2021
Rate limit: 5000, remaining 4369, reset Thu Oct  7 13:53:11 2021
Rate limit: 5000, remaining 4365, reset Thu Oct  7 13:53:11 2021
Fetching Commits
Rate limit: 5000, remaining 4362, reset Thu Oct  7 13:53:11 2021
Rate limit: 5000, remaining 4259, reset Thu Oct  7 13:53:11 2021


Unnamed: 0,state,opened_on,closed,time_to_close,commits,number,title,url,closed_at
0,open,2021-10-05 14:50:05,,,1,2249,Do not filter Error messages category,https://api.github.com/repos/eProsima/Fast-DDS...,NaT
1,open,2021-10-05 11:35:40,,,2,2248,[foxy] Fix: gap not being sent,https://api.github.com/repos/eProsima/Fast-DDS...,NaT
2,open,2021-10-04 14:44:57,,,10,2247,Allow Discovery Server without Custom Listenin...,https://api.github.com/repos/eProsima/Fast-DDS...,NaT
3,open,2021-10-04 12:25:31,,,8,2246,Discovery Server fix reconnection [12522],https://api.github.com/repos/eProsima/Fast-DDS...,NaT
4,open,2021-10-04 09:46:50,,,2,2245,Adds profile loading from XML string,https://api.github.com/repos/eProsima/Fast-DDS...,NaT


In [7]:
fast_rmw_commit, fast_rmw_issue, fast_rmw_pr = pulldown_repo_stats("ros2/rmw_fastrtps",key,date_cutoff)    
fast_rmw_commit.to_csv("./data/git_metrics/fast_rmw_commit.csv")
fast_rmw_issue.to_csv("./data/git_metrics/fast_rmw_issue.csv")
fast_rmw_pr.to_csv("./data/git_metrics/fast_rmw_pr.csv")
fast_rmw_pr.head()

Fetching pull requests
Rate limit: 5000, remaining 4205, reset Thu Oct  7 13:53:11 2021
Fetching issues
Rate limit: 5000, remaining 4181, reset Thu Oct  7 13:53:11 2021
Fetching Commits
Rate limit: 5000, remaining 4178, reset Thu Oct  7 13:53:11 2021


Unnamed: 0,state,opened_on,closed,time_to_close,commits,number,title,url,closed_at
0,open,2021-10-07 13:23:53,,,2,565,Add support for PKCS#11 in security files,https://api.github.com/repos/ros2/rmw_fastrtps...,NaT
1,closed,2021-10-02 03:13:58,,391467.0,1,564,fix QoS depth settings for clients/service ign...,https://api.github.com/repos/ros2/rmw_fastrtps...,2021-10-06 15:58:25
2,open,2021-09-24 16:41:17,,,1,560,Add client/service QoS getters,https://api.github.com/repos/ros2/rmw_fastrtps...,NaT
3,closed,2021-09-08 18:24:46,,523299.0,2,558,Update rmw_context_impl_t definition,https://api.github.com/repos/ros2/rmw_fastrtps...,2021-09-14 19:46:25
4,closed,2021-09-08 13:12:58,,193983.0,2,557,Clarify rule for selecting a XML profile using...,https://api.github.com/repos/ros2/rmw_fastrtps...,2021-09-10 19:06:01


In [8]:
cyclone_commit, cyclone_issue, cyclone_pr = pulldown_repo_stats("eclipse-cyclonedds/cyclonedds",key,date_cutoff)    
cyclone_commit.to_csv("./data/git_metrics/cyclone_commit.csv")
cyclone_issue.to_csv("./data/git_metrics/cyclone_issue.csv")
cyclone_pr.to_csv("./data/git_metrics/cyclone_pr.csv")
cyclone_pr.head()

Fetching pull requests
Rate limit: 5000, remaining 4145, reset Thu Oct  7 13:53:11 2021
Rate limit: 5000, remaining 4042, reset Thu Oct  7 13:53:11 2021
Fetching issues
Rate limit: 5000, remaining 3999, reset Thu Oct  7 13:53:11 2021
Rate limit: 5000, remaining 3996, reset Thu Oct  7 13:53:11 2021
Rate limit: 5000, remaining 3993, reset Thu Oct  7 13:53:11 2021
Fetching Commits
Rate limit: 5000, remaining 3990, reset Thu Oct  7 13:53:11 2021
Rate limit: 5000, remaining 3887, reset Thu Oct  7 13:53:11 2021
Rate limit: 5000, remaining 3784, reset Thu Oct  7 13:53:11 2021


Unnamed: 0,state,opened_on,time_to_close,closed_at,commits,number,title,url,closed
0,closed,2021-10-07 12:09:55,15529.0,2021-10-07 16:28:44,1,979,remove grammars from legal notice,https://api.github.com/repos/eclipse-cyclonedd...,
1,open,2021-10-07 08:50:59,,NaT,24,978,Update for 0.8.1,https://api.github.com/repos/eclipse-cyclonedd...,
2,open,2021-10-05 08:44:15,,NaT,3,975,Win32 stack trace of current thread,https://api.github.com/repos/eclipse-cyclonedd...,
3,closed,2021-10-04 12:08:38,71673.0,2021-10-05 08:03:11,1,974,Fix use of non-existent paths when building as...,https://api.github.com/repos/eclipse-cyclonedd...,
4,open,2021-10-01 14:13:50,,NaT,1,973,Add checks on inheritance,https://api.github.com/repos/eclipse-cyclonedd...,


In [9]:
cyclone_rmw_commit, cyclone_rmw_issue, cyclone_rmw_pr = pulldown_repo_stats("ros2/rmw_cyclonedds",key,date_cutoff)    
cyclone_rmw_commit.to_csv("./data/git_metrics/cyclone_rmw_commit.csv")
cyclone_rmw_issue.to_csv("./data/git_metrics/cyclone_rmw_issue.csv")
cyclone_rmw_pr.to_csv("./data/git_metrics/cyclone_rmw_pr.csv")
cyclone_rmw_pr.head()

Fetching pull requests
Rate limit: 5000, remaining 3678, reset Thu Oct  7 13:53:11 2021
Fetching issues
Rate limit: 5000, remaining 3652, reset Thu Oct  7 13:53:11 2021
Fetching Commits
Rate limit: 5000, remaining 3649, reset Thu Oct  7 13:53:11 2021


Unnamed: 0,state,opened_on,closed,time_to_close,commits,number,title,url,closed_at
0,open,2021-10-02 03:07:27,,,1,340,Fix QoS depth settings for clients/service ign...,https://api.github.com/repos/ros2/rmw_cycloned...,NaT
1,closed,2021-09-08 18:24:32,,523333.0,2,337,Update rmw_context_impl_t definition,https://api.github.com/repos/ros2/rmw_cycloned...,2021-09-14 19:46:45
2,closed,2021-09-08 15:34:24,,92626.0,1,336,Fix use of deprecated is_loan_available,https://api.github.com/repos/ros2/rmw_cycloned...,2021-09-09 17:18:10
3,closed,2021-09-03 14:37:56,,859996.0,1,335,Add quality declaration for rmw_cyclonedds_cpp,https://api.github.com/repos/ros2/rmw_cycloned...,2021-09-13 13:31:12
4,closed,2021-08-30 16:06:19,,3168.0,1,334,rmw_cyclonedds_cpp/CMakeLists.txt: add -latomi...,https://api.github.com/repos/ros2/rmw_cycloned...,2021-08-30 16:59:07


In [10]:
cyclone_rmw_pr.head()

Unnamed: 0,state,opened_on,closed,time_to_close,commits,number,title,url,closed_at
0,open,2021-10-02 03:07:27,,,1,340,Fix QoS depth settings for clients/service ign...,https://api.github.com/repos/ros2/rmw_cycloned...,NaT
1,closed,2021-09-08 18:24:32,,523333.0,2,337,Update rmw_context_impl_t definition,https://api.github.com/repos/ros2/rmw_cycloned...,2021-09-14 19:46:45
2,closed,2021-09-08 15:34:24,,92626.0,1,336,Fix use of deprecated is_loan_available,https://api.github.com/repos/ros2/rmw_cycloned...,2021-09-09 17:18:10
3,closed,2021-09-03 14:37:56,,859996.0,1,335,Add quality declaration for rmw_cyclonedds_cpp,https://api.github.com/repos/ros2/rmw_cycloned...,2021-09-13 13:31:12
4,closed,2021-08-30 16:06:19,,3168.0,1,334,rmw_cyclonedds_cpp/CMakeLists.txt: add -latomi...,https://api.github.com/repos/ros2/rmw_cycloned...,2021-08-30 16:59:07
