diff --git a/README.md b/README.md index 7fb5dae3..fd5e5d01 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,47 @@ which will print you: 'new_repeat': 2}}} ``` +## Extract keys from iterables + +Sometimes nested iterable can actually be considered as a dictionary. You can define a function that will attempt to extract the key from the items themselves. + +```.py +def name_is_key(item, **kwargs): + if not hasattr(item, 'get'): + return None + + return item.get('name') + +t1 = [ + {'name': 'bim', 'value': 3}, + {'name': 'bam', 'value': 4}, +] +t2 = [ + {'name': 'bam', 'value': 4}, + {'name': 'bim', 'value': 4}, +] +ddiff = DeepDiff(t1, t2, key_extractor=name_is_key) +print(ddiff) +``` + +will print out: + +```.py +{ + "values_changed": { + "root['bim']['value']": { + 'new_value': 4, + 'old_value': 3 + } + } +} +``` + +The extractor function must return `None` when it cannot find a key. + +If any item cannot extract its key, the diff will use default (or ignore order if configured so) strategy diffing will be applied on the iterable. + + ## Exclude types or paths ### Exclude certain types from comparison: diff --git a/deepdiff/diff.py b/deepdiff/diff.py index c14e5b28..0adc2ea2 100644 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -72,6 +72,10 @@ class DeepDiff(ResultDict): For Decimals, Python's format rounds 2.5 to 2 and 3.5 to 4 (to the closest even number) + key_extractor(item, index, significant_digits): function, attempts to transform lists into dicts + when defined it is run on all items in an iterable. If all the extracted keys are not None and + different, the iterator will be considered as a dict. + verbose_level : int >= 0, default = 1. Higher verbose level shows you more details. For example verbose level 1 shows what dictionary item are added or removed. @@ -193,8 +197,8 @@ class DeepDiff(ResultDict): >>> >>> print (ddiff['values_changed']["root[4]['b']"]["diff"]) - --- - +++ + --- + +++ @@ -1,5 +1,4 @@ -world! -Goodbye! @@ -613,6 +617,7 @@ def __init__(self, t2, ignore_order=False, report_repetition=False, + key_extractor=None, significant_digits=None, exclude_paths=set(), exclude_types=set(), @@ -626,6 +631,7 @@ def __init__(self, "exclude_paths, exclude_types, verbose_level and view.") % ', '.join(kwargs.keys())) self.ignore_order = ignore_order + self.key_extractor = key_extractor self.report_repetition = report_repetition self.exclude_paths = set(exclude_paths) self.exclude_types = set(exclude_types) @@ -1065,6 +1071,17 @@ def __diff_types(self, level): level.report_type = 'type_changes' self.__report_result('type_changes', level) + def __create_dict(self, iterable): + keys = [ + self.key_extractor(item, + index=i, + significant_digits=self.significant_digits) + for i, item in enumerate(iterable) + ] + if len({k for k in keys if k is not None}) != len(keys): + return + return dict(zip(keys, iterable)) + def __diff(self, level, parents_ids=frozenset({})): """The main diff method""" if level.t1 is level.t2: @@ -1092,6 +1109,17 @@ def __diff(self, level, parents_ids=frozenset({})): self.__diff_set(level) elif isinstance(level.t1, Iterable): + if self.key_extractor: + d1 = self.__create_dict(level.t1) + d2 = self.__create_dict(level.t2) + + if d1 is not None and d2 is not None: + self.__diff_dict( + level, parents_ids, + override=True, override_t1=d1, override_t2=d2 + ) + return + if self.ignore_order: self.__diff_iterable_with_contenthash(level) else: diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index 8b1a543b..4c14f08f 100644 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1430,6 +1430,58 @@ def gen2(): self.assertEqual(ddiff, result) + def test_custom_key_extractor(self): + def name_is_key(item, **kwargs): + if not hasattr(item, 'get'): + return None + + return item.get('name') + + t1 = [ + {'name': 'bim', 'value': 3}, + {'name': 'bam', 'value': 4}, + ] + t2 = [ + {'name': 'bam', 'value': 4}, + {'name': 'bim', 'value': 4}, + ] + ddiff = DeepDiff(t1, t2, key_extractor=name_is_key) + result = { + "values_changed": { + "root['bim']['value']": { + 'new_value': 4, + 'old_value': 3 + } + } + } + + self.assertEqual(ddiff, result) + + def test_custom_key_extractor_key_missing(self): + def name_is_key(item, **kwargs): + if not hasattr(item, 'get'): + return None + + return item.get('name') + + t1 = [ + {'not_name': 'bim', 'value': 3}, + {'name': 'bam', 'value': 4}, + ] + t2 = [ + {'not_name': 'bam', 'value': 4}, + {'name': 'bim', 'value': 4}, + ] + ddiff = DeepDiff(t1, t2, key_extractor=name_is_key) + result = { + 'values_changed': { + "root[1]['name']": {'new_value': 'bim', 'old_value': 'bam'}, + "root[0]['value']": {'new_value': 4, 'old_value': 3}, + "root[0]['not_name']": {'new_value': 'bam', 'old_value': 'bim'}} + } + + self.assertEqual(ddiff, result) + @mock.patch('deepdiff.diff.logger') @mock.patch('deepdiff.diff.DeepHash') def test_diff_when_hash_fails(self, mock_DeepHash, mock_logger):