# Github - Get weekly commits activity

This notebook enables us to keep track of the commits made over time

Tags: #github #commits #plotly #barchart

## Input

In [None]:
import pandas as pd
import requests
import plotly.express as px
import pydash as _pd
from urllib.parse import urlencode

## Setup Github
**How to find your personal access token on Github?**

- First we need to create a personal access token to get the details of our organization from here: https://github.com/settings/tokens
- You will be asked to select scopes for the token. Which scopes you choose will determine what information and actions you will be able to perform against the API.
- You should be careful with the ones prefixed with write:, delete: and admin: as these might be quite destructive.
- You can find description of each scope in docs here (https://docs.github.com/en/developers/apps/building-oauth-apps/scopes-for-oauth-apps).

In [None]:
# Github repository url
REPO_URL = "https://api.github.com/repos/jupyter-naas/awesome-notebooks"

# Github token
GITHUB_TOKEN = "ghp_NvAPoU0R5dkVL2GIxxxxxxxxxxxxxxxx"

## Model

### Create a function to return commits from repository url

In [None]:
def get_commits(token, repository):
    params = {
        "state": "open",
    }
    headers = {'Authorization': f'token {token}'}
    commits = []
    page = 1
    while True:
        params = {
            "state": "open",
            "per_page": "100",
            "page": page,
        }
        url = f"{repository}/commits?{urlencode(params, safe='(),')}"
        res = requests.get(url, headers=headers)
        try:
            res.raise_for_status()
        except requests.HTTPError as e:
            raise(e)
        res_json = res.json()

        if len(res_json) == 0:
            break
        for r in res_json:
            commit = {
                "AUTHOR_NAME": _pd.get(r, "author.login"),
                "AUTHOR_EMAIL": _pd.get(r, "commit.author.email"),
                "COMMIT_DATE": _pd.get(r, "commit.author.date").replace("T", " ").replace("Z", ""),
                "COMMIT_MESSAGE": _pd.get(r, "commit.message"),
                "COMMIT_ID": _pd.get(r, "sha"),
            }
            commits.append(commit)
        page += 1

    # Return dataframe
    df = pd.DataFrame(commits)
    df["COMMIT_DATE"] = pd.to_datetime(df['COMMIT_DATE'])
    return df

df_commits = get_commits(GITHUB_TOKEN, REPO_URL)
df_commits

## Output

### Get weekly commits

In [None]:
def get_weekly_commits(df):
    # Groupby and count
    df = df.groupby(pd.Grouper(freq='W', key='COMMIT_DATE')).agg({"COMMIT_ID": "count"}).reset_index()
    
    # Cleaning
    df = df.rename(columns={"COMMIT_ID": "WEEKLY_COMMITS"})
    return df

df_weekly = get_weekly_commits(df_commits)
df_weekly

### Plot a line chart of weekly commit activity

In [None]:
fig = px.bar(df_weekly,
       title="Weekly Commit Report",
       x="COMMIT_DATE",
       y="WEEKLY_COMMITS",
       labels={
           'COMMIT_DATE':'Weeks committed',
           'WEEKLY_COMMITS':"Nb. commits"
      })
fig.update_layout(
    plot_bgcolor="#ffffff",
    width=800,
    height=500,
)