<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# GitHub - Create leaderboard of contributors
<a href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/GitHub/GitHub_Get_commits_ranking_from_repository.ipynb" target="_parent">
       <img src="https://naasai-public.s3.eu-west-3.amazonaws.com/open_in_naas.svg"/></a><br><br><a href="https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=&template=template-request.md&title=Tool+-+Action+of+the+notebook+">Template request</a> | <a href="https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=bug&template=bug_report.md&title=GitHub+-+Get+commits+ranking+from+repository:+Error+short+description">Bug report</a>

**Tags:** #github #repos #commits #stats #naas_drivers #plotly #linechart #operations #analytics #dataframe #html

**Author:** [Suhas B](https://www.linkedin.com/in/suhasbrao/)


## Input

In [1]:
import pandas as pd
import plotly.express as px
from naas_drivers import github
import naas

import requests
from urllib.parse import urlencode

Note: Before executing the below line, please do add your GitHub token as a naas secret using the below line

naas.secret.add("Git_Token", <i> Put your token here</i>)

In [2]:
# Github repository url
REPO_URL = "https://github.com/jupyter-naas/awesome-notebooks"

# Github token
GITHUB_TOKEN = naas.secret.get("Git_Token")

In [3]:
# write a function to get the actor for closing an issue
def get_actor_for_closed_issue(events_url, git_obj):
    
    url = events_url
    res = requests.get(url, headers=git_obj.headers)
    try:
        res.raise_for_status()
    except requests.HTTPError as e:
        raise(e)
    
    res_json = res.json()
    if len(res_json) == 0:
        return
    
#     print(len(res_json))
    
    actor = None
    for events in res_json:
        if events["event"] == "closed":
#             print(events["actor"]["login"])
            actor= events["actor"]["login"]
    
    return actor
# get_actor_for_closed_issue("https://api.github.com/repos/jupyter-naas/awesome-notebooks/issues/1392/events")

In [5]:
df_commits = github.connect(GITHUB_TOKEN).repos.get_commits(REPO_URL)
df_commits

Unnamed: 0,ID,MESSAGE,AUTHOR_DATE,AUTHOR_NAME,AUTHOR_EMAIL,COMMITTER_DATE,COMMITTER_NAME,COMMITTER_EMAIL,COMMENTS_COUNT,VERIFICATION_REASON,VERIFICATION_STATUS
0,02e137410f099e604d5d6e270bcc16d1ed8deed0,Merge pull request #1393 from jupyter-naas/138...,2023-01-31 15:18:52,FlorentLvr,48032461+FlorentLvr@users.noreply.github.com,2023-01-31 15:18:52,GitHub,noreply@github.com,0,valid,True
1,2c562eaf6c33c85c06004d4a3cffc69e7229698b,feat: create treemap with plotly graph objects,2023-01-31 15:12:01,FlorentLvr,florent.ravenel@cashstory.com,2023-01-31 15:12:01,FlorentLvr,florent.ravenel@cashstory.com,0,unsigned,False
2,aa3730ab736e655755781b00e492fb4f3f73b579,feat: update references,2023-01-31 15:11:12,FlorentLvr,florent.ravenel@cashstory.com,2023-01-31 15:11:12,FlorentLvr,florent.ravenel@cashstory.com,0,unsigned,False
3,c2cf7dad63e258a97864af7c35f343648ea81c62,feat: create treemaps with plotly express,2023-01-31 15:01:56,FlorentLvr,florent.ravenel@cashstory.com,2023-01-31 15:01:56,FlorentLvr,florent.ravenel@cashstory.com,0,unsigned,False
4,8e2b1a60fecf60d1feff7b53d5ba90ee6b47bf1e,feat: rename outputs,2023-01-31 14:53:54,FlorentLvr,florent.ravenel@cashstory.com,2023-01-31 14:53:54,FlorentLvr,florent.ravenel@cashstory.com,0,unsigned,False
...,...,...,...,...,...,...,...,...,...,...,...
2101,0ea23b89ce2a6066c7109e5ee4114d812378e4e2,Update README.md,2020-10-29 08:36:19,BobCashStory,47117399+BobCashStory@users.noreply.github.com,2020-10-29 08:36:19,GitHub,noreply@github.com,0,valid,True
2102,83ecdbdfbd26bb9ac13b0735d7cc134e38d3b860,Update README.md,2020-10-29 08:35:09,BobCashStory,47117399+BobCashStory@users.noreply.github.com,2020-10-29 08:35:09,GitHub,noreply@github.com,0,valid,True
2103,0a52defaf0c3f9b34f264c48da73d5ed2e40aca8,Update README.md,2020-10-29 08:34:28,BobCashStory,47117399+BobCashStory@users.noreply.github.com,2020-10-29 08:34:28,GitHub,noreply@github.com,0,valid,True
2104,58cf4de85a2c375b3699abf1db312bd300a8eb1c,Update README.md,2020-10-29 08:34:04,BobCashStory,47117399+BobCashStory@users.noreply.github.com,2020-10-29 08:34:04,GitHub,noreply@github.com,0,valid,True


In [85]:
def get_commits(df):
    # Exclude Github commits
    df = df[(df.COMMITTER_EMAIL.str[-10:] != "github.com")]
    
    # Groupby and count
    df = df.groupby(["AUTHOR_NAME"], as_index=False).agg({"ID": "count"})
    
    # Cleaning
    df = df.rename(columns={"ID": "NB_COMMITS"})
    return df.sort_values(by="NB_COMMITS", ascending=False).reset_index(drop=True)

df_commits_by_contributor = get_commits(df_commits)
df_commits_by_contributor.head()

Unnamed: 0,AUTHOR_NAME,NB_COMMITS
0,FlorentLvr,575
1,Maxime Jublou,114
2,jravenel,82
3,SanjuEpic,73
4,tparente-ui,63


In [4]:
def get_all_issues(repo_url):
    '''
    This function retrives all the issues of a repository and returns a 
    dataframe with the following columns:
    
    link_to_the_issue
    issue_title
    issue_number
    issue_state
    issue_creator
    last_created_date
    last_created_time
    last_updated_date
    last_updated_time
    

    Parameters
    ----------
    repository: str:
        Repository url from Github.
        Example : "https://github.com/jupyter-naas/awesome-notebooks"
    '''
    
    git_obj = github.connect(GITHUB_TOKEN)
    repository = git_obj.get_repository_url(repo_url)
    df = pd.DataFrame()
    page, idx = 1, 0
    while True:
        params = {
                "per_page": "100",
                "page": page,
            }
        
        # Api to get open issues from github
        url = f"https://api.github.com/repos/{repository}/issues?state=all&{urlencode(params, safe='(),')}"
        res = requests.get(url, headers=git_obj.headers)
        try:
            res.raise_for_status()
        except requests.HTTPError as e:
            raise(e)
        
        res_json = res.json()
        if len(res_json) == 0:
            break
        
#         print(len(res_json))
        for issue in res_json:
            # Fetch all the issues, check node_id to see if it is an issue or PR
            
            if(issue["node_id"].startswith("I_")):
                
                df.loc[idx,'link_to_the_issue'], df.loc[idx, 'issue_number'] = issue['html_url'], issue['number']
                df.loc[idx, 'issue_title'], df.loc[idx, 'issue_state'] = issue['title'], issue['state']
   
                df.loc[idx, "issue_creator"] = issue["user"].get("login")
                
                # Create a cloumn that stores the name of the person who closed the issue
                # else if the issue is Open the column value will be None
                df.loc[idx, "issue_closed_by"] = get_actor_for_closed_issue( issue["events_url"], git_obj )
                
                df.loc[idx, 'last_created_date'] = issue.get('created_at').strip('Z').split('T')[0]
                df.loc[idx, 'last_created_time'] = issue.get('created_at').strip('Z').split('T')[-1]
                df.loc[idx, 'last_updated_date'] = issue.get('updated_at').strip('Z').split('T')[0]
                df.loc[idx, 'last_updated_time'] = issue.get('updated_at').strip('Z').split('T')[-1]
                
                
                idx +=1
        page+=1
    return df



In [8]:
df_all_issues = get_all_issues(REPO_URL)
print("Total open Issues fetched:", len(df_all_issues))

Total open Issues fetched: 827


In [9]:
df_all_issues.head()

Unnamed: 0,link_to_the_issue,issue_number,issue_title,issue_state,issue_creator,issue_closed_by,last_created_date,last_created_time,last_updated_date,last_updated_time
0,https://github.com/jupyter-naas/awesome-notebo...,1394.0,Azure - Download files in blob storage,open,FlorentLvr,,2023-01-31,15:29:14,2023-01-31,15:30:32
1,https://github.com/jupyter-naas/awesome-notebo...,1392.0,Azure - Upload files in blob storage,open,FlorentLvr,,2023-01-31,10:47:27,2023-01-31,10:49:00
2,https://github.com/jupyter-naas/awesome-notebo...,1391.0,LinkedIn - Get profile information: Error shor...,open,JJayasri,,2023-01-31,09:20:41,2023-01-31,09:20:41
3,https://github.com/jupyter-naas/awesome-notebo...,1389.0,FEC - Display Balance Sheet with Treemap,closed,FlorentLvr,FlorentLvr,2023-01-30,16:20:26,2023-01-31,09:59:19
4,https://github.com/jupyter-naas/awesome-notebo...,1388.0,Plotly - Create Treemap,closed,FlorentLvr,FlorentLvr,2023-01-30,16:18:33,2023-01-31,15:18:54


In [48]:
# Need to get df for issue comments and PR comments

In [86]:
def get_contributors_details(repo_url):
    git_obj = github.connect(GITHUB_TOKEN)
    repository = git_obj.get_repository_url(repo_url)
    df = pd.DataFrame()
    page, idx = 1, 0
    while True:
        params = {
                "per_page": "100",
                "page": page,
            }
        url = f"https://api.github.com/repos/{repository}/contributors?{urlencode(params, safe='(),')}"
        res = requests.get(url, headers=git_obj.headers)
        try:
            res.raise_for_status()
        except requests.HTTPError as e:
            raise(e)
        
        res_json = res.json()
        if len(res_json) == 0:
            break
        
        for contributor in res_json:
#             print(contributor)
            df.loc[idx, "contributor"] = contributor["login"]
            # create a column to store number of issues created by a contributor
            df.loc[idx, "issues_created"] = len(df_all_issues.loc[df_all_issues["issue_creator"]==contributor["login"]])
            # create a column to store number of issues closed by a contributor
            
            df.loc[idx, "issues_closed"] = len(df_all_issues.loc[df_all_issues["issue_closed_by"]==contributor["login"]])
#             print(contributor["login"])
            # add a column to store number of commits made by a contributor
#          
#             df.loc[idx, "commits"] = len(df_commits.loc[df_commits["AUTHOR_NAME"] == contributor["login"]])
            commits = df_commits_by_contributor.loc[df_commits_by_contributor["AUTHOR_NAME"] == contributor["login"]]["NB_COMMITS"]
            
            df.loc[idx, "commits"] = commits.values[0] if commits.empty==False else 0
            idx +=1
        
        page += 1
    
    return df

In [87]:
df_commits_by_contributor.loc[df_commits_by_contributor["AUTHOR_NAME"] == "FlorentLvr"]["NB_COMMITS"].empty

False

In [88]:
df = get_contributors_details(REPO_URL)

In [89]:
df

Unnamed: 0,contributor,issues_created,issues_closed,commits
0,FlorentLvr,350.0,160.0,575.0
1,fravenel,0.0,0.0,0.0
2,jravenel,105.0,64.0,82.0
3,Dr0p42,293.0,14.0,0.0
4,tparente-ui,7.0,0.0,63.0
...,...,...,...,...
66,Rayryu,1.0,0.0,0.0
67,arimbr,0.0,0.0,1.0
68,Arun-kc,0.0,0.0,0.0
69,delasalle-sio-martin,0.0,0.0,0.0


In [6]:
def get_issues_leaders(df):
    df = df.groupby(["issue_creator"], as_index=False).agg({"issue_title":"count"})
    df = df.rename(columns={"issue_title":"Issues_Created", "issue_creator":"Creator"})
    
    return df.sort_values(by="Issues_Created", ascending=False).reset_index(drop=True)


In [7]:
# df = get_commits(df_commits)
df = get_issues_leaders(df_all_issues)
df.head()

Unnamed: 0,Creator,Issues_Created
0,FlorentLvr,349
1,Dr0p42,293
2,jravenel,105
3,Valpiquard,17
4,SanjuEpic,11


In [17]:
len(df_all_issues.loc[df_all_issues["issue_creator"]=="FlorentLvr"])

349