Skip to content

Commit

Permalink
Refactor DeltaCode.determine_moved() #4
Browse files Browse the repository at this point in the history
  * Convert utility function 'index_delta_files()' to DeltaCode method
    'index_deltas()'.
  * Index Delta objects rather than their respective File objects.
  * Create new test codebases and add new tests.
  * Modify existing tests to include new 'moved' Delta as necessary.

Signed-off-by: John M. Horan <johnmhoran@gmail.com>
  • Loading branch information
johnmhoran committed Jan 15, 2018
1 parent 82dec05 commit be39ca0
Show file tree
Hide file tree
Showing 9 changed files with 3,863 additions and 70 deletions.
84 changes: 42 additions & 42 deletions src/deltacode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,49 +132,49 @@ def determine_delta(self):

def determine_moved(self):
"""
Modify the OrderedDict of Delta objects by comparing the 'sha1', 'name'
and 'size' attributes of the 'removed' and 'added' files and converting
each unique matching pair to a Delta object with the category attribute
'moved'. This method currently excludes groups of matching 'removed'
and 'added' files comprising more than one 'removed' and one 'added' file.
Modify the OrderedDict of Delta objects by creating an index of
'removed' Delta objects and an index of 'added' Delta objects indexed
by their 'sha1' attribute, finding Deltas in both indices with the same
'sha1', and converting the 'added' and 'removed' Deltas to a 'moved'
Delta if (1) there is only one such 'added' and one such 'moved'
Delta and (2) these two Deltas have the same 'name' attribute.
"""
list_old_removed_files = [i.old_file for i in self.deltas['removed']]
old_removed_index = utils.index_delta_files(list_old_removed_files, 'sha1')

list_new_added_files = [i.new_file for i in self.deltas['added']]
new_added_index = utils.index_delta_files(list_new_added_files, 'sha1')

unique_removed_sha1 = []
unique_added_sha1 = []

for key, value in old_removed_index.iteritems():
if len(value) == 1:
for file in value:
unique_removed_sha1.append({'sha1': file.sha1, 'name': file.name, 'size': file.size})

for key, value in new_added_index.iteritems():
if len(value) == 1:
for file in value:
unique_added_sha1.append({'sha1': file.sha1, 'name': file.name, 'size': file.size})

for added_dict in unique_added_sha1:
for removed_dict in unique_removed_sha1:
if added_dict['sha1'] == removed_dict['sha1'] and \
added_dict['name'] == removed_dict['name'] and \
added_dict['size'] == removed_dict['size']:

for delta_added in self.deltas['added'][:]:
if added_dict['sha1'] == delta_added.new_file.sha1 and \
added_dict['name'] == delta_added.new_file.name and \
added_dict['size'] == delta_added.new_file.size:
self.deltas['moved'].append(Delta(delta_added.new_file, None, 'moved'))
self.deltas['added'].remove(delta_added)

for delta_removed in self.deltas['removed'][:]:
for delta_moved in self.deltas['moved']:
if added_dict['sha1'] == delta_removed.old_file.sha1 and added_dict['sha1'] == delta_moved.new_file.sha1:
delta_moved.old_file = delta_removed.old_file
self.deltas['removed'].remove(delta_removed)
removed = self.index_deltas([i for i in self.deltas['removed']], 'sha1')
added = self.index_deltas([i for i in self.deltas['added']], 'sha1')

for k, v in removed.iteritems():

This comment has been minimized.

Copy link
@steven-esser

steven-esser Jan 15, 2018

Contributor

We should probably be more explicit here and use sha1 as k and deltas as v in the loop

if (k in added and

This comment has been minimized.

Copy link
@steven-esser

steven-esser Jan 15, 2018

Contributor

This is a long and confusing if statement. We should probably move this logic out into some helper function that handles all the correct checks and simply returns a boolean value.

len(removed[k]) == 1 and
len(added[k]) == 1 and
removed[k][0].old_file.name == added[k][0].new_file.name):
self.deltas['moved'].append(Delta(added[k][0].new_file, removed[k][0].old_file, 'moved'))
self.deltas['added'].remove(added[k][0])
self.deltas['removed'].remove(removed[k][0])

This comment has been minimized.

Copy link
@steven-esser

steven-esser Jan 15, 2018

Contributor

We should probably avoid referencing specific delta objects from the original dict. Using values like added[k][0] in the lines above make this block of code very incoherent. If we need to operate or use the value added[k][0] multiple times, we should assign that value to a variable that has a meaningful name


def index_deltas(self, delta_list, index_key='path'):

This comment has been minimized.

Copy link
@steven-esser

steven-esser Jan 15, 2018

Contributor

We should probably change the order of the arguments here. Having index_key be the first argument makes more sense here and is easier to read when called.

"""
Return a dictionary of a list of Delta objects indexed by the key
passed via the 'index_key' variable. If no 'index_key' variable is
passed, the dict is keyed by the Delta object's 'path' variable. For a
'removed' Delta object, use the variable from the 'old_file'; for all
other Delta objects (e.g., 'added'), use the 'new_file'. This function
does not currently catch the AttributeError exception.
"""
index = {}

for delta in delta_list:
if delta.category == 'removed':
key = getattr(delta.old_file, index_key)
else:
key = getattr(delta.new_file, index_key)

if index.get(key) is None:
index[key] = []
index[key].append(delta)
else:
index[key].append(delta)

return index

def get_stats(self):
"""
Expand Down
21 changes: 0 additions & 21 deletions src/deltacode/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,24 +101,3 @@ def fix_trees(a_files, b_files):
for b_file in b_files:
b_file.original_path = b_file.path
b_file.path = '/'.join(paths.split(b_file.path)[b_offset:])


def index_delta_files(self, index_key='path'):
"""
Return a dictionary of a list of File objects indexed by the key passed via
the 'key' variable. If no 'key' variable is passed, the dict is
keyed by the File object's 'path' variable. This function does not
currently catch the AttributeError exception.
"""
index = {}

for f in self:
key = getattr(f, index_key)

if index.get(key) is None:
index[key] = []
index[key].append(f)
else:
index[key].append(f)

return index
Loading

0 comments on commit be39ca0

Please sign in to comment.