# Load libraries

In [1]:
import numpy as np
import re
import pandas as pd
import os
from datetime import datetime

# Specify git executable file for GitPython in Jupyter Notebook (In IDE, it can still work without this line.)
os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = "C:\Program Files\Git\cmd\git.exe"

import git
from git import RemoteProgress

from git import Repo
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Clone repo from GitHub

Link: https://git-scm.com/docs/git-clone <br>
<b>Note:</b> In case too-long file path issue occurs in Windows, set <code>git config --system core.longpaths true</code>

In [2]:
class Progress(RemoteProgress):
    def update(self, op_code, cur_count, max_count=None, message=''):
        print(self._cur_line)

In [3]:
remote_link = "https://github.com/apache/atlas"
local_link = "C:\\Secure Software\\atlas"
# Uncomment to clone
# Repo.clone_from(remote_link, local_link, progress=Progress())

In [4]:
repo = Repo(local_link)
fixing_commit = "0dcfd21bbfaac6f037f46b7aaaab0e5546fd2a7"
affected_file = "webapp/src/main/java/org/apache/atlas/web/util/Servlets.java"

# A) Message and Title of the Fixing Commit
Link: https://git-scm.com/docs/git-show
You can explore different parameters.

In [5]:
show_data = repo.git.show("-s", fixing_commit).splitlines()
show_data.reverse()
print("Message and title of the commit:")
print(show_data[0])

Message and title of the commit:
    (cherry picked from commit ac80b8b61b122abe57a644cbfe3868f07c301914)


# B) Total Files Affected

In [6]:
total_files_affected = repo.git.show("--stat",fixing_commit).splitlines()
print("Files Afftected:")
total_files_affected = total_files_affected[6:]
for line in total_files_affected:
    print(line)

Files Afftected:
    Signed-off-by: Madhan Neethiraj <madhan@apache.org>
    (cherry picked from commit ac80b8b61b122abe57a644cbfe3868f07c301914)

 .../src/main/java/org/apache/atlas/web/util/Servlets.java   | 13 +------------
 .../apache/atlas/web/resources/EntityJerseyResourceIT.java  |  3 ---
 2 files changed, 1 insertion(+), 15 deletions(-)


# C) Total Directories Affected

In [7]:
total_directories_affected = repo.git.show("--dirstat",fixing_commit).splitlines()
total_directories_affected = total_directories_affected[6:]
print("Directories Affected:")
for lines in total_directories_affected:
    print(lines)

Directories Affected:
    Signed-off-by: Madhan Neethiraj <madhan@apache.org>
    (cherry picked from commit ac80b8b61b122abe57a644cbfe3868f07c301914)

  69.3% webapp/src/main/java/org/apache/atlas/web/util/
  30.6% webapp/src/test/java/org/apache/atlas/web/resources/


# D) Total lines of code (including comments and blank lines) Deleted

In [8]:
lines_del_inc_comm = repo.git.show("--shortstat",fixing_commit).splitlines()
lines_del_inc_comm.reverse()
lines_del_inc_comm = lines_del_inc_comm[0].split(',')
lines_del_inc_comm = lines_del_inc_comm[2].split(" ")
print("Total lines of code deleted(including comments and blank lines): "+lines_del_inc_comm[1])

Total lines of code deleted(including comments and blank lines): 15


# E) Total lines of code (including comments and blank lines) Added

In [9]:
lines_add_inc_comm = repo.git.show("--shortstat",fixing_commit).splitlines()
lines_add_inc_comm.reverse()
lines_add_inc_comm = lines_add_inc_comm[0].split(',')
lines_add_inc_comm = lines_add_inc_comm[1].split(" ")
print("Total lines of code Added(including comments and blank lines): "+lines_add_inc_comm[1])

Total lines of code Added(including comments and blank lines): 1


# F) Total lines of code (excluding comments and blank lines) Deleted

In [10]:
lines_del_exc_comm = repo.git.show("-U0","--pretty=""",fixing_commit).splitlines()
lines_added = []
tot = 0
for line in lines_del_exc_comm:
    if re.search("^\-",line):
        l = line.split(" ")
        if(len(l)>1):
            if(len(l[0]) == 1):
                if(l[1][:1] != '*' or l[1][:1] != '/'):
                    tot += 1
print("Total line of code(excluding comments and blank lines) Deleted:" + str(tot))

Total line of code(excluding comments and blank lines) Deleted:15


# G) Total lines of code (excluding comments and blank lines) Added

In [11]:
lines_add_exc_comm = repo.git.show("-U0","--pretty=""",fixing_commit).splitlines()
lines_added = []
tot = 0
for line in lines_add_exc_comm:
    if re.search("^\+",line):
        l = line.split(" ")
        if(len(l)>1):
            if(len(l[0]) == 1):
                if(l[1][:1] != '*' or l[1][:1] != '/'):
                    tot += 1
print("Total line of code(excluding comments and blank lines) Added:" + str(tot))


Total line of code(excluding comments and blank lines) Added:1


# H) Days between the current fixing commit and the previous commit

In [12]:
days = repo.git.log("--format=%cd",fixing_commit,"--",affected_file,"-2").splitlines()
list_size = len(days)
if list_size<2:
    print("0 Days")
else:
    last_commit_date = datetime.strptime(days[0],"%a %b %d %X %Y %z")
    last_prev_commit_date = datetime.strptime(days[1],"%a %b %d %X %Y %z")
    date_diff = (abs(last_commit_date-last_prev_commit_date))
    print("No of days:" + str(date_diff))

No of days:242 days, 9:43:30


# I) No of times each affected file of the current fixing commit been modified in the past since their creation

In [13]:
no_of_times = repo.git.log("--follow","--format=%h",fixing_commit,"--",affected_file).splitlines()
length = len(no_of_times)
print("No of times each affected file of the current fixing commit been modified in the past since their creation:"+ str(length))

No of times each affected file of the current fixing commit been modified in the past since their creation:21


# J) Developers who has modified each affected file since its creation

In [14]:
no_of_times = repo.git.log("--format=%an",fixing_commit,"--",affected_file).splitlines()
s = set(no_of_times)
print("List of Developers:")
for line in s:
    print(line)

List of Developers:
Venkatesh Seetharam
Shwetha GS
nixonrodrigues


# K) Commits of each Developer

In [15]:
commit_authors = repo.git.shortlog("-sn", "--all").splitlines()
author_commits = []

for commit_author in commit_authors:
    commit, author = commit_author.split("\t")
    for authors in s:
        if(authors==author):
            author_commits.append((int(commit), authors))

In [16]:
df = pd.DataFrame(author_commits, columns=['Commit','Author'])

In [17]:
df.head()

Unnamed: 0,Commit,Author
0,410,Shwetha GS
1,178,nixonrodrigues
2,155,Venkatesh Seetharam
