In [None]:
#################################################################################
# A set of helper utilities for Bitbucket to get list of commits by individuals across repos
# Getting started
# 1. Setup a bitbucket application password
# 2. Setup the environment variables within your pythong notebook, for example run below (do not enter quotes)
# ```
# %env BITBUCKET_USER=<your username>
# %env BITBUCKET_PASS=<application password>
# %env BITBUCKET_URL=https://api.bitbucket.org/
# %env BITBUCKET_WORKSPACE=inspiredbytech
# %env PROJECT_KEY=ABC
# %env USER_ID={xxxx-xxxx-xxxx-xxx-xx}
# ```
#################################################################################

from atlassian.bitbucket.cloud import Cloud
import os, json
import requests
import pandas as pd
%load_ext dotenv
%dotenv
username = os.getenv('BITBUCKET_USER', None)
password = os.getenv('BITBUCKET_PASS', None)
url = os.getenv('BITBUCKET_URL', None)
workspace = os.getenv('BITBUCKET_WORKSPACE', None)
project_key = os.getenv('PROJECT_KEY', None)
user_id = os.getenv('USER_ID', None)

In [None]:
def get_repo(repo_name):
    url = f'https://api.bitbucket.org/2.0/repositories/{workspace}/{repo_name}'
    rsp = requests.get(url,  headers=headers, auth=(username, password))
    print(json.dumps(rsp.json(), sort_keys=True, indent=4)) 

def get_permissions(repo_name):
    url = f'https://api.bitbucket.org/2.0/repositories/{workspace}/{repo_name}'
    rsp = requests.get(url,  headers=headers, auth=(username, password))
    print(json.dumps(rsp.json(), sort_keys=True, indent=4)) 
    
def get_commits(repo_name):
    url =  f'https://api.bitbucket.org/2.0/repositories/{workspace}/{repo_name}/commits/'
    rsp = requests.get(url,  headers=headers, auth=(username, password))
    #print(json.dumps(rsp.json(), sort_keys=True, indent=4)) 
    return rsp.json()

In [None]:
url = f'https://api.bitbucket.org/2.0/repositories/{workspace}/?q=project.key="{project_key}"&limit=1000&pagelen=100'
headers = {
    'Content-Type': 'application/json',
}
params = (
    ('key1', 'mykeyhere'),
)

column_names = {
    'author': 'author.user.nickname',
    'author2': 'author.raw',
    'hash': 'hash',
    'date': 'date',
    'summary': 'summary.raw',
    'repo_name': 'repository.name',
    'message': 'message',
    'summary.raw': 'summary.raw'
}

lst_colmns = list(column_names.values())

print(lst_colmns)
headers = {'content-type': 'application/json', 'Accept-Charset': 'UTF-8'}
repos_request = requests.get(url,  headers=headers,auth=(username, password))

repos_json = repos_request.json()
df_repos = pd.DataFrame.from_dict(repos_json['values'])

df_report = pd.DataFrame(columns=lst_colmns)
df_report.set_index('hash')

"""commits_json = get_commits('infra-compliance')
df_nested_list = pd.json_normalize(commits_json, record_path =['values'])
print(df_nested_list.size)
print(df_nested_list.columns)
df_nested_list"""

In [None]:

import datetime
for repo in df_repos['name']:
    commits_json = get_commits(repo)
    df = pd.DataFrame.from_dict(commits_json['values'])
    df_nested_list = pd.json_normalize(commits_json, record_path =['values'])
    #print(json.dumps(commits_json, sort_keys=True, indent=4))
    if df_nested_list.size > 0 and column_names['author'] in df_nested_list.columns:
        df_report = pd.concat([df_report, df_nested_list[lst_colmns]])
        print(f'appending {df_nested_list[lst_colmns].size} records')
print(df_report)

df_report['date_only']=pd.to_datetime(df_report[column_names['date'] ], utc=False).dt.date
df_report = df_report.fillna('')
#print(df_report[[column_names['author'], column_names['repo_name'], 'date_only']])


In [None]:
#################################################################################
# Excel report
#################################################################################
author_field = 'author'
df_report_recent = df_report[df_report['date_only'] > ((datetime.datetime.now() - datetime.timedelta(30)).date())]
df_groups_authorrepo = df_report_recent[
        ['date_only', column_names[author_field] , column_names['repo_name']]
    ].groupby(
        ['date_only', column_names[author_field], column_names['repo_name']],
        dropna=True
    )
df_groups_authorrepo = df_groups_authorrepo.sum().reset_index()
(df_groups_authorrepo.sort_values(by=['date_only',column_names[author_field],column_names['repo_name']], ascending=False)).to_excel('export.xlsx', '2weeks')
df_recent_commits= df_report_recent[['date_only', column_names[author_field] , column_names['repo_name'], column_names['message']]].sort_values('date_only', ascending=False)
df_recent_commits.head(36).to_excel('export.xlsx', 'commits', engine='xlsxwriter')
df_report[[column_names['author'],column_names['author2'], column_names['repo_name'], 'date_only']].sort_values('date_only',ascending=False).to_excel('export.xlsx', 'author_activity', engine='xlsxwriter')

In [None]:
#################################################################################
# Graph
#################################################################################

df_report1 =df_report[df_report['date_only'] > ((datetime.datetime.now() - datetime.timedelta(30)).date())]
df_report1['count'] = 1
author_field = 'author2'

df_groups_author = df_report1[['date_only', column_names[author_field] , 'count']].groupby(['date_only', column_names[author_field] ],dropna=True)
df_groups_author = df_groups_author.sum().reset_index()
#print(df_groups_author)
import plotly.express as px
import plotly.tools as tls
from plotly.offline import iplot
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot

fig = px.line(
        df_groups_author,
        x='date_only', y='count', color=column_names[author_field] , width=1200, log_y=False)
fig.show()