<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# GitHub - Create leaderboard of contributors
<a href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/GitHub/GitHub_Get_commits_ranking_from_repository.ipynb" target="_parent">
       <img src="https://naasai-public.s3.eu-west-3.amazonaws.com/open_in_naas.svg"/></a><br><br><a href="https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=&template=template-request.md&title=Tool+-+Action+of+the+notebook+">Template request</a> | <a href="https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=bug&template=bug_report.md&title=GitHub+-+Get+commits+ranking+from+repository:+Error+short+description">Bug report</a>

**Tags:** #github #repos #commits #stats #naas_drivers #plotly #linechart #operations #analytics #dataframe #html

**Author:** [Suhas B](https://www.linkedin.com/in/suhasbrao/)


## Input

In [1]:
import pandas as pd
import plotly.express as px
from naas_drivers import github
import naas

import requests
from urllib.parse import urlencode

Note: Before executing the below line, please do add your GitHub token as a naas secret using the below line

naas.secret.add("Git_Token", <i> Put your token here</i>)

In [2]:
# Github repository url
REPO_URL = "https://github.com/jupyter-naas/awesome-notebooks"

# Github token
GITHUB_TOKEN = naas.secret.get("Git_Token")

## Model

In [5]:
# write a function to get the actor for closing an issue
def get_actor_for_closed_issue(events_url, git_obj):
    '''
    This function is used to get actor for a closed issues
    It gives the name of a person who closed the issue.
    
    Parameters
    ----------
    events_url: str:
        events_url from Github.
        Example : "https://api.github.com/repos/jupyter-naas/awesome-notebooks/issues/1401/events"
    
    git_obj: object of naas_driver.github
    '''
    
    url = events_url
    res = requests.get(url, headers=git_obj.headers)
    try:
        res.raise_for_status()
    except requests.HTTPError as e:
        raise(e)
    
    res_json = res.json()
    if len(res_json) == 0:
        return
    
    
    actor = None
    for events in res_json:
        if events["event"] == "closed":
            actor= events["actor"]["login"]
    
    return actor
# get_actor_for_closed_issue("https://api.github.com/repos/jupyter-naas/awesome-notebooks/issues/1392/events")

In [6]:
def get_all_issues(repo_url):
    '''
    This function retrives all the issues of a repository and returns a 
    dataframe with the following columns:
    
    link_to_the_issue
    issue_title
    issue_number
    issue_state
    issue_creator
    issue_closed_by
    last_created_date
    last_created_time
    last_updated_date
    last_updated_time
    

    Parameters
    ----------
    repo_url: str:
        Repository url from Github.
        Example : "https://github.com/jupyter-naas/awesome-notebooks"
    '''
    
    git_obj = github.connect(GITHUB_TOKEN)
    repository = git_obj.get_repository_url(repo_url)
    df = pd.DataFrame()
    page, idx = 1, 0
    while True:
        params = {
                "per_page": "100",
                "page": page,
            }
        
        # Api to get open issues from github
        url = f"https://api.github.com/repos/{repository}/issues?state=all&{urlencode(params, safe='(),')}"
        res = requests.get(url, headers=git_obj.headers)
        try:
            res.raise_for_status()
        except requests.HTTPError as e:
            raise(e)
        
        res_json = res.json()
        if len(res_json) == 0:
            break
        
#         print(len(res_json))
        for issue in res_json:
            # Fetch all the issues, check node_id to see if it is an issue or PR
            
            if(issue["node_id"].startswith("I_")):
                
                df.loc[idx,'link_to_the_issue'], df.loc[idx, 'issue_number'] = issue['html_url'], issue['number']
                df.loc[idx, 'issue_title'], df.loc[idx, 'issue_state'] = issue['title'], issue['state']
   
                df.loc[idx, "issue_creator"] = issue["user"].get("login")
                
                # Create a cloumn that stores the name of the person who closed the issue
                # else if the issue is Open the column value will be None
                df.loc[idx, "issue_closed_by"] = get_actor_for_closed_issue( issue["events_url"], git_obj )
                
                df.loc[idx, 'last_created_date'] = issue.get('created_at').strip('Z').split('T')[0]
                df.loc[idx, 'last_created_time'] = issue.get('created_at').strip('Z').split('T')[-1]
                df.loc[idx, 'last_updated_date'] = issue.get('updated_at').strip('Z').split('T')[0]
                df.loc[idx, 'last_updated_time'] = issue.get('updated_at').strip('Z').split('T')[-1]
                
                
                idx +=1
        page+=1
    return df



In [7]:
df_all_issues = get_all_issues(REPO_URL)
print("Total open Issues fetched:", len(df_all_issues))

Total open Issues fetched: 834


In [8]:
df_all_issues.head()

Unnamed: 0,link_to_the_issue,issue_number,issue_title,issue_state,issue_creator,issue_closed_by,last_created_date,last_created_time,last_updated_date,last_updated_time
0,https://github.com/jupyter-naas/awesome-notebo...,1401.0,Tool - Action of the notebook,open,mustafavi11,,2023-02-04,08:52:55,2023-02-04,08:52:55
1,https://github.com/jupyter-naas/awesome-notebo...,1400.0,SpaCy - Tokenize a text corpus,open,jravenel,,2023-02-02,16:59:56,2023-02-02,17:01:41
2,https://github.com/jupyter-naas/awesome-notebo...,1399.0,OpenBB - Create template with SPY,open,FlorentLvr,,2023-02-02,16:47:12,2023-02-02,16:47:43
3,https://github.com/jupyter-naas/awesome-notebo...,1398.0,FEC - Create Barline chart,open,FlorentLvr,,2023-02-02,16:34:58,2023-02-02,16:35:52
4,https://github.com/jupyter-naas/awesome-notebo...,1397.0,FEC - Create double line chart,open,FlorentLvr,,2023-02-02,16:33:46,2023-02-02,16:35:47


In [9]:
def get_issue_pr_comments(repo_url):
    '''
    The function retrieves all the issue comments and pr comments for a given r
    repository url.
    The function returns a dataframe with following columns for an issue comment/pr comment
    
    comment_id
    issue_url
    comment_by
    comment_body
    '''
    git_obj = github.connect(GITHUB_TOKEN)
    repository = git_obj.get_repository_url(repo_url)
    df = pd.DataFrame()
    page, idx = 1, 0
    while True:
        params = {
                "per_page": "100",
                "page": page,
            }
        
        # Api to get open issues from github
       
        url = f"https://api.github.com/repos/{repository}/issues/comments?{urlencode(params, safe='(),')}"
        res = requests.get(url, headers=git_obj.headers)
        try:
            res.raise_for_status()
        except requests.HTTPError as e:
            raise(e)
        
        res_json = res.json()
        if len(res_json) == 0:
            break
        
#         print(len(res_json))
        for issue_comment in res_json:
            df.loc[idx, "comment_id"] = issue_comment["id"]
            df.loc[idx, "issue_url"] = issue_comment["issue_url"]
            df.loc[idx, "comment_by"] = issue_comment["user"]["login"]
            df.loc[idx, "comment_body"] = issue_comment["body"]
            
            idx +=1
        page += 1
        
    return df

In [11]:
df_all_comments = get_issue_pr_comments(REPO_URL)
df_all_comments.head()

Unnamed: 0,comment_id,issue_url,comment_by,comment_body
0,718561230.0,https://api.github.com/repos/jupyter-naas/awes...,BobCashStory,https://github.com/jupyter-naas/awesome-notebo...
1,718561597.0,https://api.github.com/repos/jupyter-naas/awes...,BobCashStory,https://github.com/jupyter-naas/awesome-notebo...
2,718562018.0,https://api.github.com/repos/jupyter-naas/awes...,BobCashStory,https://github.com/jupyter-naas/awesome-notebo...
3,721256997.0,https://api.github.com/repos/jupyter-naas/awes...,jravenel,Input Laden \r\n- app : https://app.landen.co/...
4,746005918.0,https://api.github.com/repos/jupyter-naas/awes...,sanjaysabu4205,Snowflake using CRUD method


In [12]:
def get_contributors_details(repo_url):
    '''
    The function retrieves contributors for a given repository url
    This function returns a dataframe of contributors with following columns
    
    contributor_name
    issues_created
    issues_closed
    issue_pr_comments
    commits
    
    '''
    git_obj = github.connect(GITHUB_TOKEN)
    repository = git_obj.get_repository_url(repo_url)
    df = pd.DataFrame()
    page, idx = 1, 0
    while True:
        params = {
                "per_page": "100",
                "page": page,
            }
        url = f"https://api.github.com/repos/{repository}/contributors?{urlencode(params, safe='(),')}"
        res = requests.get(url, headers=git_obj.headers)
        try:
            res.raise_for_status()
        except requests.HTTPError as e:
            raise(e)
        
        res_json = res.json()
        if len(res_json) == 0:
            break
        
        for contributor in res_json:
            df.loc[idx, "contributor_name"] = contributor["login"]
            # create a column to store number of issues created by a contributor
            df.loc[idx, "issues_created"] = len(df_all_issues.loc[df_all_issues["issue_creator"]==contributor["login"]])
            # create a column to store number of issues closed by a contributor
            
            df.loc[idx, "issues_closed"] = len(df_all_issues.loc[df_all_issues["issue_closed_by"]==contributor["login"]])
            
            # Need to get df for issue comments and PR comments
            df.loc[idx, "issue_pr_comments"] = len(df_all_comments.loc[df_all_comments["comment_by"] == contributor["login"]])
            
            # add a column to store number of commits made by a contributor
            df.loc[idx, "commits"] = len(git_obj.repos.get_commits(REPO_URL, contributor["login"]))
            idx +=1
        
        page += 1
    
    return df

In [13]:
df_contributors = get_contributors_details(REPO_URL)

In [15]:
# Add a column of points for contributors df
df_contributors["pts"] = 1*df_contributors["issues_created"] + 1*df_contributors["issues_closed"] + 0.5*df_contributors["issue_pr_comments"]

In [17]:
df_contributors.head()

Unnamed: 0,contributor_name,issues_created,issues_closed,issue_pr_comments,commits,pts
0,FlorentLvr,354.0,160.0,285.0,265.0,656.5
1,fravenel,0.0,0.0,0.0,280.0,0.0
2,jravenel,107.0,64.0,507.0,142.0,424.5
3,Dr0p42,293.0,14.0,114.0,93.0,364.0
4,tparente-ui,7.0,0.0,2.0,98.0,8.0


In [18]:
def get_contributors_leaders(df):
    
    return df.sort_values(by="pts", ascending=False).reset_index(drop=True)


In [20]:
# df = get_commits(df_commits)
df_leaderboard = get_contributors_leaders(df_contributors)
df_leaderboard.head()

Unnamed: 0,contributor_name,issues_created,issues_closed,issue_pr_comments,commits,pts
0,FlorentLvr,354.0,160.0,285.0,265.0,656.5
1,jravenel,107.0,64.0,507.0,142.0,424.5
2,Dr0p42,293.0,14.0,114.0,93.0,364.0
3,SanjuEpic,11.0,0.0,59.0,75.0,40.5
4,SuhasBRao,0.0,0.0,62.0,11.0,31.0
