In [1]:
from pydriller import RepositoryMining

In [2]:
config = {
    "url": "../data/react-vis",
    "url_test_01": "../data/test_repo"
}

In [3]:
import os.path

class Directory:
    
    ## class Directory maintains a static snapshot of a directory structure
    def __init__(self, name=''):
        self.sub_directories = {}
        self.files = {}
        self.size = 0
        self.freeze = False
        self.name = name
        
    def add_file(self, path, file, size = 1):
        
        dirname = os.path.dirname(path)
        
        if dirname == '':
            self.files[path] = file
        else:
            sub_directory = path.split('/')[0]
            if sub_directory not in self.sub_directories: #create new directory
                self.sub_directories[sub_directory] = Directory(name=sub_directory)
            self.sub_directories[sub_directory].add_file('/'.join(path.split('/')[1:]), file, size = size)
        
        self.size += size
    
    def remove_file(self, path, size = 1):
        
        dirname = os.path.dirname(path)
        
        if dirname == '':
            del self.files[path]
        else:
            sub_directory = path.split('/')[0]
            self.sub_directories[sub_directory].remove_file('/'.join(path.split('/')[1:]), size = size)
            
        self.size -= size
    
    def rename_file(self, old_path, new_path, file, size = 1):
        
        self.remove_file(old_path, size = size)
        self.add_file(new_path, file, size = size)
        
    def modify_file(self, path, size_change = 0):
    
        self.size += size_change
        
        dirname = os.path.dirname(path)
        if dirname != '':
            sub_directory = path.split('/')[0]
            self.sub_directories[sub_directory].modify_file('/'.join(path.split('/')[1:]), size_change = size_change)
        
    def clean_print(self, level=0):
        
        ## print a depth first tranversal of the tree
        print(' ' * level + self.name)
        for directoryname in self.sub_directories:
            self.sub_directories[directoryname].clean_print(level = level + 1)
        for filename in self.files:
            print(' ' * (level + 1) + filename)
        
    def to_json_tree(self):
        
        pass



In [4]:
#test directory functions

root = Directory()

root.add_file('a', None)
root.add_file('b/c/d', None)
root.add_file('b/c/d', None)
root.add_file('b/c/e', None)

root.remove_file('b/c/d')

root.clean_print()



 b
  c
   e
 a


In [6]:
## only for textfiles

import re
from collections import Counter

class File:
    
    def __init__(self, name='undefined'):
        self.name = name
        self.authors = None
        self.authors_lines = None
        self.creation_time = None
        self.content = [] #content contains lines in the files each line {'last_author': xx, 'string': line_content}
        
    def update(self, diff, author='undefined'):
        
        lines = diff.split('\n')
        
        i = 0
        while i < len(lines):
            if lines[i].startswith('@@'):
                idx = re.search('[0-9]+', lines[i])[0] #-1
                idx = int(idx)
                p_idx = idx
                i += 1
                while i < len(lines) and not lines[i].startswith('@@'):
                    if lines[i].startswith('+'): #update content
                        self.content.insert(p_idx, {
                            'last_author': author,
                            'string': lines[i][1:]
#                             'timestamp':  #TODO: add timestamp to the line
                        })
                        idx += 1
                        p_idx += 1
                    elif lines[i].startswith('-'):
                        self.content.pop(p_idx)
                    else:
                        p_idx = idx
                        idx += 1
                    
                    i += 1
                    
        self.authors = None #none marks the field as dirty
        self.authors_lines = None 
                    
    def authors(self):
        # return the authors of the file
        if self.authors is None:
            self.authors = list(set([x['last_author'] for x in self.content]))
        return self.authors
        
    def lines(self):
        return len(self.content)
    
    def authors_lines(self):
        if self.authors_lines is None:j
            self.authors_lines = dict(Counter([x['last_author'] for x in self.content]))       
        return self.authors_lines



IndentationError: unexpected indent (<ipython-input-6-e76cfd47697e>, line 57)

In [168]:
## test file class
## use readme.md in test_repo for analysis

file = File(name='readme.md')

from pydriller.domain.commit import ModificationType

i, limit = 0, 10
for commit in RepositoryMining(path_to_repo=["../data/test_repo"]).traverse_commits():
    if i < limit:
        print("Project {}, commit {}, author {}, date {}".format(
            commit.project_path, commit.hash, commit.author.email, commit.author_date, commit.msg))
        i += 1
        for x in file.content:
            print(x)
        for modification in commit.modifications:
            print("File {}: change type {} added {}, removed {} ".format(modification.new_path, modification.change_type , modification.added, modification.removed))
            print(modification.diff)
            file.update(modification.diff, author = commit.author.email)


Project ../data/test_repo, commit 91502f0b0c67131cd442a8815f93977e52be275c, author xpp2007@gmail.com, date 2019-06-02 16:27:18-07:00
File readme.md: change type ModificationType.ADD added 7, removed 0 
@@ -0,0 +1,7 @@
+a
+b
+c
+d
+e
+f
+g

Project ../data/test_repo, commit 87754c302c374db2e0e1b8f7390dd27bea8cd863, author xpp2007@gmail.com, date 2019-06-02 16:28:00-07:00
{'last_author': 'xpp2007@gmail.com', 'string': 'a'}
{'last_author': 'xpp2007@gmail.com', 'string': 'b'}
{'last_author': 'xpp2007@gmail.com', 'string': 'c'}
{'last_author': 'xpp2007@gmail.com', 'string': 'd'}
{'last_author': 'xpp2007@gmail.com', 'string': 'e'}
{'last_author': 'xpp2007@gmail.com', 'string': 'f'}
{'last_author': 'xpp2007@gmail.com', 'string': 'g'}
File readme.md: change type ModificationType.MODIFY added 1, removed 2 
@@ -1,7 +1,6 @@
 a
-b
 c
 d
-e
 f
+xx
 g

Project ../data/test_repo, commit f6ad043b287cd88e2866ac4d7cca6f90042ecd12, author xpp2007@gmail.com, date 2019-06-05 20:51:23-07:00
{'last_author': 