### Extract directory structure from commits and summarize rate of change

In [2]:
from config import repo_url, export_directory
from common import collect_commits

In [68]:
import os.path

class Directory:
    
    ## class Directory maintains a static snapshot of a directory structure
    def __init__(self, name=''):
        self.sub_directories = {}
        self.files = {}
        self.size = 0
        self.name = name
        
    def add_file(self, path, file, size = 1):
                
        dirname = os.path.dirname(path)
        
        if dirname == '':
            self.files[path] = file
        else:
            sub_directory = path.split('/')[0]
            if sub_directory not in self.sub_directories: #create new directory
                self.sub_directories[sub_directory] = Directory(name=sub_directory)
            self.sub_directories[sub_directory].add_file('/'.join(path.split('/')[1:]), file, size = size)
        
        self.size += size
    
    def remove_file(self, path, size = 1):
                
        dirname = os.path.dirname(path)
        
        if dirname == '':
            if path in self.files:
                del self.files[path]
        else:
            sub_directory = path.split('/')[0]
            if sub_directory in self.sub_directories:
                self.sub_directories[sub_directory].remove_file('/'.join(path.split('/')[1:]), size = size)
            
        self.size -= size
    
    def rename_file(self, old_path, new_path, file, size = 1):
        
        self.remove_file(old_path, size = size)
        self.add_file(new_path, file, size = size)
        
    def modify_file(self, path, size_change = 0):
                   
        self.size += size_change
        
        dirname = os.path.dirname(path)
        if dirname == '':
            if path not in self.files:
                self.files[path] = ''
        else:
            sub_directory = path.split('/')[0]
            if sub_directory not in self.sub_directories: #create new directory if sub_directory not exists
                self.sub_directories[sub_directory] = Directory(name=sub_directory)
            self.sub_directories[sub_directory].modify_file('/'.join(path.split('/')[1:]), size_change = size_change)
        

    def clean_empty_folder(self):

        for k, v in self.sub_directories.items():
            v.clean_empty_folder()

        to_remove = []            
        for k, v in self.sub_directories.items():
            if len(v.files) == 0 and len(v.sub_directories) == 0:
                to_remove.append(k)

        for k in to_remove:
            del self.sub_directories[k]

        return self
        
        
    def clean_print(self, level=0):
        
        ## print a depth first tranversal of the tree
        print('\t' * level + self.name)
        for directoryname in self.sub_directories:
            self.sub_directories[directoryname].clean_print(level = level + 1)
        for filename in self.files:
            print('\t' * (level + 1) + filename)
            
            
            
    def update_with_commits(self, commits):
        # four change types Added, Deleted, Modified, or Renamed.
        for commit in commits:
            for modification in commit.modifications:
                if modification.change_type.name == 'ADD':
                    self.add_file(modification.new_path, '')
                if modification.change_type.name == 'DELETE':
                    self.remove_file(modification.old_path)
                if modification.change_type.name == 'RENAME':
                    self.rename_file(modification.old_path, modification.new_path, '')
                if modification.change_type.name == 'MODIFY':
                    self.modify_file(modification.old_path)

        return self
        
        
    def to_json_tree(self):
        
        pass



In [72]:
branch="master"
root = Directory()
root_copy = Directory()
from pydriller import RepositoryMining


for commit in RepositoryMining(
        path_to_repo=repo_url,
        only_in_branch=branch,
).traverse_commits():
    root.update_with_commits([commit])
    root.clean_empty_folder()    
    root_copy.update_with_commits([commit])


In [73]:
root.clean_empty_folder()

<__main__.Directory at 0x7fde37cc7d60>

In [74]:
root.clean_print()


	src
		utils
			actionTypes.ts
			isPlainObject.ts
			symbol-observable.ts
		types
			store.ts
			actions.ts
			reducers.ts
			middleware.ts
		index.ts
		bindActionCreators.ts
		applyMiddleware.ts
		compose.ts
		createStore.ts
		combineReducers.ts
	examples
		todomvc
			src
				actions
					index.js
					index.spec.js
				components
					Footer.js
					Footer.spec.js
					MainSection.js
					MainSection.spec.js
					TodoItem.js
					TodoItem.spec.js
					TodoTextInput.js
					TodoTextInput.spec.js
					App.js
					App.spec.js
					Link.js
					Link.spec.js
					TodoList.js
					TodoList.spec.js
					Header.js
					Header.spec.js
				constants
					ActionTypes.js
					TodoFilters.js
				containers
					FilterLink.js
					Header.js
					MainSection.js
					VisibleTodoList.js
				reducers
					index.js
					todos.js
					todos.spec.js
					visibilityFilter.js
				selectors
					index.js
				index.js
			public
				index.html
			package.json
			.gitignore
			README.md
			package-lock.json


In [100]:
import os.path

class Directory:
    
    ## class Directory maintains a static snapshot of a directory structure
    def __init__(self, name=''):
        self.sub_directories = {}
        self.files = {}
        self.nfiles = 0
        self.name = name
        
    def add_file(self, path, file, lines = None):
                
        dirname = os.path.dirname(path)
        
        if dirname == '':
            self.files[path] = {
                "file": file,
                "lines": lines
            }
        else:
            sub_directory = path.split('/')[0]
            if sub_directory not in self.sub_directories: #create new directory
                self.sub_directories[sub_directory] = Directory(name=sub_directory)
            self.sub_directories[sub_directory].add_file('/'.join(path.split('/')[1:]), file, lines = lines)
        
        self.nfiles += 1
    
    def remove_file(self, path):
                
        dirname = os.path.dirname(path)
        
        if dirname == '':
            if path in self.files:
                self.nfiles -= 1
                file_data = self.files[path]
                del self.files[path]
                return file_data
            else:
                return None
        else:
            sub_directory = path.split('/')[0]
            if sub_directory in self.sub_directories:
                self.nfiles -= 1
                return self.sub_directories[sub_directory].remove_file('/'.join(path.split('/')[1:]))
            else:
                return None
        
    
    def rename_file(self, old_path, new_path, file, lines=None):
        
        file_data = self.remove_file(old_path)
        if file_data is not None:
            self.add_file(new_path, file_data['file'], file_data['lines'])
        else:
            self.add_file(new_path, file, lines=lines)
        

    def modify_file(self, path, file, lines=None):
                           
        dirname = os.path.dirname(path)
        if dirname == '':
            if path not in self.files:
                self.files[path] = {
                    'file': file,
                    'lines': None
                }
            self.files[path]['lines'] = lines
        else:
            sub_directory = path.split('/')[0]
            if sub_directory not in self.sub_directories: #create new directory if sub_directory not exists
                self.sub_directories[sub_directory] = Directory(name=sub_directory)
            self.sub_directories[sub_directory].modify_file('/'.join(path.split('/')[1:]), file, lines = lines)
        

    def clean_empty_folder(self):

        for k, v in self.sub_directories.items():
            v.clean_empty_folder()

        to_remove = []            
        for k, v in self.sub_directories.items():
            if len(v.files) == 0 and len(v.sub_directories) == 0:
                to_remove.append(k)

        for k in to_remove:
            del self.sub_directories[k]

        return self
        
        
    def clean_print(self, level=0):
        
        ## print a depth first tranversal of the tree
        print('\t' * level + self.name + ' ' + str(self.nfiles))
        for directoryname in self.sub_directories:
            self.sub_directories[directoryname].clean_print(level = level + 1)
        for filename in self.files:
            print('\t' * (level + 1) + filename + ' ' + str(self.files[filename]['lines']))
            
            
    def update_with_commits(self, commits):
        # four change types Added, Deleted, Modified, or Renamed.
        for commit in commits:
            for modification in commit.modifications:
                if modification.change_type.name == 'ADD':
                    self.add_file(modification.new_path, '', lines=modification.nloc)
                if modification.change_type.name == 'DELETE':
                    self.remove_file(modification.old_path)
                if modification.change_type.name == 'RENAME':
                    self.rename_file(modification.old_path, modification.new_path, '', lines=modification.nloc)
                if modification.change_type.name == 'MODIFY':
                    self.modify_file(modification.old_path, '', lines=modification.nloc)

        return self
        
        
    def to_json_tree(self):

        root = {}
        root['children'] = []
        for k, v in self.sub_directories.items():
            root['children'].append({
                'name': k,
                **v.to_json_tree()})

        for f, v in self.files.items():
            root['children'].append({
                'name': f,
                'lines': v['lines'],
                'value': 1
            })

        return root


In [97]:
branch="master"
root = Directory()
root_copy = Directory()
from pydriller import RepositoryMining


for commit in RepositoryMining(
        path_to_repo=repo_url,
        only_in_branch=branch,
).traverse_commits():
    root.update_with_commits([commit])
    root.clean_empty_folder()    
    root_copy.update_with_commits([commit])


In [98]:
root.clean_empty_folder()
root.clean_print()

 395
	src 14
		utils 4
			actionTypes.ts 12
			isPlainObject.ts 8
			symbol-observable.ts 4
		types 4
			store.ts 66
			actions.ts 12
			reducers.ts 28
			middleware.ts 14
		index.ts 53
		bindActionCreators.ts 54
		applyMiddleware.ts 60
		compose.ts 32
		createStore.ts 200
		combineReducers.ts 170
	examples 227
		todomvc 35
			src 30
				actions 2
					index.js 8
					index.spec.js 39
				components 16
					Footer.js 41
					Footer.spec.js 77
					MainSection.js 35
					MainSection.spec.js 91
					TodoItem.js 57
					TodoItem.spec.js 95
					TodoTextInput.js 47
					TodoTextInput.spec.js 66
					App.js 9
					App.spec.js 27
					Link.js 19
					Link.spec.js 35
					TodoList.js 19
					TodoList.spec.js 50
					Header.js 20
					Header.spec.js 39
				constants 2
					ActionTypes.js 7
					TodoFilters.js 3
				containers 4
					FilterLink.js 15
					Header.js 4
					MainSection.js 16
					VisibleTodoList.js 16
				reducers 4
					index.js 8
					todos.js 54
					todos.spec.js 276
					vis

In [101]:
root.to_json_tree()

{'children': [{'name': 'src',
   'children': [{'name': 'utils',
     'children': [{'name': 'actionTypes.ts',
       'lines': 12,
       'value': 1,
       'children': []},
      {'name': 'isPlainObject.ts', 'lines': 8, 'value': 1, 'children': []},
      {'name': 'symbol-observable.ts',
       'lines': 4,
       'value': 1,
       'children': []}]},
    {'name': 'types',
     'children': [{'name': 'store.ts',
       'lines': 66,
       'value': 1,
       'children': []},
      {'name': 'actions.ts', 'lines': 12, 'value': 1, 'children': []},
      {'name': 'reducers.ts', 'lines': 28, 'value': 1, 'children': []},
      {'name': 'middleware.ts', 'lines': 14, 'value': 1, 'children': []}]},
    {'name': 'index.ts', 'lines': 53, 'value': 1, 'children': []},
    {'name': 'bindActionCreators.ts', 'lines': 54, 'value': 1, 'children': []},
    {'name': 'applyMiddleware.ts', 'lines': 60, 'value': 1, 'children': []},
    {'name': 'compose.ts', 'lines': 32, 'value': 1, 'children': []},
    {'name': 

In [90]:
# branch="master"
# from pydriller import RepositoryMining

# for commit in RepositoryMining(
#         path_to_repo=repo_url,
#         only_in_branch=branch,
# ).traverse_commits():
#     for modification in commit.modifications:
#         if modification.change_type.name == 'ADD':
#             print(modification.added, modification.nloc)
#             break
#         if modification.change_type.name == 'DELETE':
#             print(modification.removed, modification.nloc)
#             break
#         if modification.change_type.name == 'RENAME':
#             print(modification.added, modification.removed, modification.nloc)
#             break
#         if modification.change_type.name == 'MODIFY':
#             print(modification.added, modification.removed, modification.nloc)
#             break
            


1 1 12
17 32 54
0 0 None
5 2 15
1 1 25
2 3 26
0 0 None
2 2 65
1 1 5
5 5 15
1 7 15


KeyboardInterrupt: 