# Load libraries

In [2]:
import numpy as np
import re
import pandas as pd
import os
from datetime import datetime

# Specify git executable file for GitPython in Jupyter Notebook (In IDE, it can still work without this line.)
os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = "C:\Program Files\Git\cmd\git.exe"

import git
from git import RemoteProgress

from git import Repo
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Clone repo from GitHub

Link: https://git-scm.com/docs/git-clone <br>
<b>Note:</b> In case too-long file path issue occurs in Windows, set <code>git config --system core.longpaths true</code>

In [3]:
class Progress(RemoteProgress):
    def update(self, op_code, cur_count, max_count=None, message=''):
        print(self._cur_line)

In [4]:
remote_link = "https://github.com/jenkinsci/jenkins"
local_link = "C:\\Secure Software\\jenkins"
# Uncomment to clone
# Repo.clone_from(remote_link, local_link, progress=Progress())

In [5]:
repo = Repo(local_link)
fixing_commit = "47f38d714c99e1841fb737ad1005618eb26ed852"
affected_file = "core/src/main/java/jenkins/security/stapler/DoActionFilter.java"

# A) Message and Title of the Fixing Commit
Link: https://git-scm.com/docs/git-show
You can explore different parameters.

In [22]:
show_data = repo.git.show("-s", fixing_commit).splitlines()
show_data.reverse()
print("Message and title of the commit:")
print(show_data[2])

Message and title of the commit:
 [SECURITY-595]


# B) Total Files Affected

In [7]:
total_files_affected = repo.git.show("--stat",fixing_commit).splitlines()
print("Files Afftected:")
total_files_affected = total_files_affected[6:]
for line in total_files_affected:
    print(line)

Files Afftected:
    Co-Authored-By: Wadeck Follonier <wadeck.follonier@gmail.com>

 core/pom.xml                                       |   7 +-
 core/src/main/java/hudson/ProxyConfiguration.java  |   2 +
 .../main/java/hudson/TcpSlaveAgentListener.java    |   2 +
 .../hudson/diagnosis/ReverseProxySetupMonitor.java |   2 +
 core/src/main/java/hudson/model/Computer.java      |   3 +
 core/src/main/java/hudson/model/ModelObject.java   |   3 +
 .../src/main/java/hudson/model/ParameterValue.java |   2 +
 core/src/main/java/hudson/model/Queue.java         |   2 +
 core/src/main/java/hudson/model/UpdateCenter.java  |   4 +
 core/src/main/java/hudson/model/View.java          |   2 +
 .../hudson/security/AuthorizationStrategy.java     |   2 +
 .../java/hudson/security/csrf/CrumbIssuer.java     |   2 +
 .../main/java/jenkins/diagnosis/HsErrPidList.java  |   2 +
 .../main/java/jenkins/install/InstallState.java    |   2 +
 core/src/main/java/jenkins/model/Jenkins.java      |  19 +
 .../jenkins/se

# C) Total Directories Affected

In [8]:
total_directories_affected = repo.git.show("--dirstat",fixing_commit).splitlines()
total_directories_affected = total_directories_affected[6:]
print("Directories Affected:")
for lines in total_directories_affected:
    print(lines)

Directories Affected:
    Co-Authored-By: Wadeck Follonier <wadeck.follonier@gmail.com>

  17.1% core/src/main/java/jenkins/security/stapler/
   3.4% core/src/main/resources/jenkins/security/stapler/
   3.1% core/src/test/java/jenkins/security/stapler/
  68.6% test/src/test/java/jenkins/security/stapler/
   5.5% test/src/test/resources/plugins/


# D) Total lines of code (including comments and blank lines) Deleted

In [24]:
lines_del_inc_comm = repo.git.show("--shortstat",fixing_commit).splitlines()
lines_del_inc_comm.reverse()
lines_del_inc_comm = lines_del_inc_comm[0].split(',')
lines_del_inc_comm = lines_del_inc_comm[2].split(" ")
print("Total lines of code deleted(including comments and blank lines): "+lines_del_inc_comm[1])

Total lines of code deleted(including comments and blank lines): 3


# E) Total lines of code (including comments and blank lines) Added

In [23]:
lines_add_inc_comm = repo.git.show("--shortstat",fixing_commit).splitlines()
lines_add_inc_comm.reverse()
lines_add_inc_comm = lines_add_inc_comm[0].split(',')
lines_add_inc_comm = lines_add_inc_comm[1].split(" ")
print("Total lines of code Added(including comments and blank lines): "+lines_add_inc_comm[1])

Total lines of code Added(including comments and blank lines): 5044


# F) Total lines of code (excluding comments and blank lines) Deleted

In [25]:
lines_del_exc_comm = repo.git.show("-U0","--pretty=""",fixing_commit).splitlines()
lines_added = []
tot = 0
for line in lines_del_exc_comm:
    if re.search("^\-",line):
        l = line.split(" ")
        if(len(l)>1):
            if(len(l[0]) == 1):
                if(l[1][:1] != '*' or l[1][:1] != '/'):
                    tot += 1
print("Total line of code(excluding comments and blank lines) Deleted:" + str(tot))

Total line of code(excluding comments and blank lines) Deleted:2


# G) Total lines of code (excluding comments and blank lines) Added

In [26]:
lines_add_exc_comm = repo.git.show("-U0","--pretty=""",fixing_commit).splitlines()
lines_added = []
tot = 0
for line in lines_add_exc_comm:
    if re.search("^\+",line):
        l = line.split(" ")
        if(len(l)>1):
            if(len(l[0]) == 1):
                if(l[1][:1] != '*' or l[1][:1] != '/'):
                    tot += 1
print("Total line of code(excluding comments and blank lines) Added:" + str(tot))


Total line of code(excluding comments and blank lines) Added:4198


# H) Days between the current fixing commit and the previous commit

In [21]:
days = repo.git.log("--format=%cd",fixing_commit,"--",affected_file,"-2").splitlines()
list_size = len(days)
if list_size<2:
    print("0 Days")
else:
    last_commit_date = datetime.strptime(days[0],"%a %b %d %X %Y %z")
    last_prev_commit_date = datetime.strptime(days[1],"%a %b %d %X %Y %z")
    date_diff = (abs(last_commit_date-last_prev_commit_date))
    print("No of days:" + str(date_diff))

0 Days


# I) No of times each affected file of the current fixing commit been modified in the past since their creation

In [13]:
no_of_times = repo.git.log("--follow","--format=%h",fixing_commit,"--",affected_file).splitlines()
length = len(no_of_times)
print("No of times each affected file of the current fixing commit been modified in the past since their creation:"+ str(length))

No of times each affected file of the current fixing commit been modified in the past since their creation:1


# J) Developers who has modified each affected file since its creation

In [14]:
no_of_times = repo.git.log("--format=%an",fixing_commit,"--",affected_file).splitlines()
s = set(no_of_times)
print("List of Developers:")
for line in s:
    print(line)

List of Developers:
Daniel Beck


# K) Commits of each Developer

In [15]:
commit_authors = repo.git.shortlog("-sn", "--all").splitlines()
author_commits = []

for commit_author in commit_authors:
    commit, author = commit_author.split("\t")
    for authors in s:
        if(authors==author):
            author_commits.append((int(commit), authors))

In [16]:
df = pd.DataFrame(author_commits, columns=['Commit','Author'])

In [17]:
df.head()

Unnamed: 0,Commit,Author
0,1210,Daniel Beck
