# GitPython Parts

In [2]:
from git import Repo
import os

In [3]:
repo = Repo(os.path.join(os.environ.get("HOME"), "commons-math"))

assert not repo.bare


In [4]:
repo.head.commit.message

'MATH-1672: Remove stat.ranking package\n\nFunctionality has been transferred to the Commons Statistics ranking\nmodule.\n'

In [11]:
repo.head.set_commit("HEAD~1").commit.message

'MATH-1670: Remove stat.inference package\n\nFunctionality has been transferred to the Commons Statistics inference\nmodule.\n'

In [None]:
repo.head.commit.message

# Head has already set to HEAD~1, seems like we already checkouted to HEAD~1

'MATH-1670: Remove stat.inference package\n\nFunctionality has been transferred to the Commons Statistics inference\nmodule.\n'

In [15]:
repo.head.set_commit(repo.refs["origin/master"])
repo.head.commit.message


'MATH-1672: Remove stat.ranking package\n\nFunctionality has been transferred to the Commons Statistics ranking\nmodule.\n'

In [124]:
diff = repo.head.commit.diff(repo.head.commit.parents[0], create_patch=True)[0]

diff.a_path


'commons-math-legacy/pom.xml'

In [116]:


import re

def diff_header_parser(diff: str) -> list[int]:
    # print(diff)
    regex = re.compile(r"(@@) ([\-\+][0-9]+,[0-9]+ )+(@@)")

    m = re.search(regex, diff)
    
    if m == None:
        return [0, 0]

    group: list[str] = re.findall(r"[\-\+][1-9]+,[1-9]+", m.string)

    
    minus = 0
    plus = 0

    for x in group:
        d = re.search(r"([\-\+])([0-9]+),([0-9]+)", x)
        # print(d.groups())
        if d.groups()[0] == '-':
            minus += int(d.groups()[2])
        else:
            plus += int(d.groups()[2])



    return [minus, plus]
        


    
    

diff_header_parser(diff=diff.diff.__str__())



[11, 6]

In [127]:
import pandas as pd

repo_dict = dict.fromkeys(['commit', 'commit message'])
repo_dict['commit'] = []
repo_dict['commit message'] = []
repo_dict['commit changed file'] = []
repo_dict['commit minus line'] = []
repo_dict['commit plus line'] = []



for x in repo.iter_commits():
    repo_dict['commit'].append(x.hexsha)
    repo_dict['commit message'].append(x.message)
    changed_file = []
    try:
        diff = x.diff(x.parents[0], create_patch=True)
        plus = 0
        minus = 0
        for x in diff:
            if x.a_path != None:
                changed_file.append(x.a_path)
            else:
                changed_file.append("")
            num = diff_header_parser(diff=x.diff.__str__())
            plus += num[1]
            minus += num[0]
    except IndexError as e:
        pass
    finally:
        repo_dict['commit minus line'].append(minus)
        repo_dict['commit plus line'].append(plus)
        repo_dict['commit changed file'].append(changed_file)

    
    
repo_dict

{'commit': ['e195ee2d084af99e53c9a8e47df8abbec85edc67',
  '95847b773741bf66d7a98c18f71596542921c56a',
  '6c6ffb5952ebbe921be7dbf7d1d601b8b7d67fa5',
  'f7ca94625de99adb2ad7e07e696a327ba7b887b8',
  'eb9cfebe960ff19b2b51f67dc46c21a4959080fc',
  'aed06726224122b483bdd31b60b1bc62c1873d98',
  'f83ac78c27c3c34732517e118ff20ab24b8fa862',
  'c0c0de40a7e0405c9d02d7432e82b3c480b5a735',
  'dea6eaed863c4cb59eee92a8e696a9f94a88d525',
  'b945a2d921adbd1abd46c6d47232d33711a7ed93',
  '0a9211791eb8ad855b2b164faf1e91b5eb14defc',
  '49664cf826b5d3a49a05e93e8c4e2f59d6ab7689',
  '64f1847ae9bef4c72dbf280a05ff5dd6a2eb0705',
  'b71393922c1c53f792c615670e2960d1964a2387',
  'a27cd3cab06b0ea3dd8a488f116ee0f13a221d27',
  '484ff99a6ec5d42c34f55f4fab81234a1e2766a7',
  '01a5a62e565692c6a00a7a65deea9e7aaaa7cfb0',
  '8dad3b8b5fad8b13f23843b93c36b4ffc9f097eb',
  'b0e7c4678fb2c0d16d65bbaaafcc22fd18483dfa',
  '03edd659394ecbe44e7903007366fca4814cdd6b',
  'd77ef654a0a5d991c8e77d794a166585499e59dd',
  '61249a552ced95345623e

In [128]:

df = pd.DataFrame.from_dict(repo_dict)
df

Unnamed: 0,commit,commit message,commit changed file,commit minus line,commit plus line
0,e195ee2d084af99e53c9a8e47df8abbec85edc67,MATH-1672: Remove stat.ranking package\n\nFunc...,"[commons-math-legacy/pom.xml, commons-math-leg...",79,1008
1,95847b773741bf66d7a98c18f71596542921c56a,MATH-1670: Remove stat.inference package\n\nFu...,"[commons-math-legacy/pom.xml, , , , , , , , , ...",73,8458
2,6c6ffb5952ebbe921be7dbf7d1d601b8b7d67fa5,Merge pull request #259 from apache/dependabot...,"[.github/workflows/codeql-analysis.yml, .githu...",24,24
3,f7ca94625de99adb2ad7e07e696a327ba7b887b8,Bump github/codeql-action from 3.28.12 to 3.28...,"[.github/workflows/codeql-analysis.yml, .githu...",24,24
4,eb9cfebe960ff19b2b51f67dc46c21a4959080fc,Javadoc @since tag should be after @throws\n,[commons-math-legacy/src/main/java/org/apache/...,18,18
...,...,...,...,...,...
7241,9b7cfb86b26842a3a1a33b5142485cc19d8f1213,Added myself to STATUS and PROPOSAL\n\n\ngit-s...,"[PROPOSAL.html, STATUS.html]",15,24
7242,352f134f0ed563bdfd979b1b78fe8d48a18888e8,A maven project.xml was added for conv\n\n\ngi...,[project.xml],94,0
7243,e4694325bc52209c0700294437d4e260991e9f82,Starting source code - basic matrix operations...,"[build.xml, src/java/org/apache/commons/math/F...",1558,14
7244,925847780e3c03b04936f41a1739efe16702184d,added new commons math component\n\n\ngit-svn-...,"[.cvsignore, PROPOSAL.html, RELEASE-NOTES.txt,...",246,0
