From d5246f5f2a43138cdb11b0c49175bc81134ad4ab Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Fri, 7 Aug 2020 14:12:54 +0200 Subject: [PATCH 1/3] Split DependencyTree from Node and DatatasetDict In order to make future refactorings easier, DependencyTree is no longer a subclass of Node. Instead, the DependencyTree holds an instance of a Node as root. Also, we make the DependencyTree rely on a new _DataIDContainer, which copies some functionality of the DatasetDict. However, this will allow for easier refactorings of the Scene and DependencyTree classes as they are separate. Finally, this commit introduces tests for the DependencyTree class. --- satpy/node.py | 90 ++++++++++++++++++++++++----- satpy/tests/test_dependency_tree.py | 78 +++++++++++++++++++++++++ satpy/tests/test_scene.py | 23 -------- 3 files changed, 152 insertions(+), 39 deletions(-) create mode 100644 satpy/tests/test_dependency_tree.py diff --git a/satpy/node.py b/satpy/node.py index 46b34756a3..5828867cd2 100644 --- a/satpy/node.py +++ b/satpy/node.py @@ -17,9 +17,8 @@ # satpy. If not, see . """Nodes to build trees.""" -from satpy import DatasetDict from satpy.dataset import DataID, DataQuery, ModifierTuple -from satpy.readers import TooManyResults +from satpy.readers import TooManyResults, get_key from satpy.utils import get_logger from satpy.dataset import create_filtered_query @@ -28,7 +27,7 @@ EMPTY_LEAF_NAME = "__EMPTY_LEAF_SENTINEL__" -class Node(object): +class Node: """A node object.""" def __init__(self, name, data=None): @@ -133,7 +132,7 @@ def trunk(self, unique=True): return res -class DependencyTree(Node): +class DependencyTree: """Structure to discover and store `Dataset` dependencies. Used primarily by the `Scene` object to organize dependency finding. @@ -170,15 +169,14 @@ def __init__(self, readers, compositors, modifiers, available_only=False): self.compositors = compositors self.modifiers = modifiers self._available_only = available_only - # we act as the root node of the tree - super(DependencyTree, self).__init__(None) + self._root = Node(None) # keep a flat dictionary of nodes contained in the tree for better # __contains__ - self._all_nodes = DatasetDict() + self._all_nodes = _DataIDContainer() def leaves(self, nodes=None, unique=True): - """Get the leaves of the tree starting at this root. + """Get the leaves of the tree starting at the root. Args: nodes (iterable): limit leaves for these node names @@ -189,7 +187,7 @@ def leaves(self, nodes=None, unique=True): """ if nodes is None: - return super(DependencyTree, self).leaves(unique=unique) + return self._root.leaves(unique=unique) res = list() for child_id in nodes: @@ -211,7 +209,7 @@ def trunk(self, nodes=None, unique=True): """ if nodes is None: - return super(DependencyTree, self).trunk(unique=unique) + return self._root.trunk(unique=unique) res = list() for child_id in nodes: @@ -236,12 +234,13 @@ def add_child(self, parent, child): def add_leaf(self, ds_id, parent=None): """Add a leaf to the tree.""" if parent is None: - parent = self + parent = self._root try: node = self[ds_id] except KeyError: node = Node(ds_id) self.add_child(parent, node) + return node def copy(self): """Copy this node tree. @@ -253,9 +252,9 @@ def copy(self): any datasets not already existing in the dependency tree. """ new_tree = DependencyTree({}, self.compositors, self.modifiers) - for c in self.children: + for c in self._root.children: c = c.copy(node_cache=new_tree._all_nodes) - new_tree.add_child(new_tree, c) + new_tree.add_child(new_tree._root, c) return new_tree def __contains__(self, item): @@ -268,11 +267,15 @@ def __getitem__(self, item): def contains(self, item): """Check contains when we know the *exact* DataID or DataQuery.""" - return super(DatasetDict, self._all_nodes).__contains__(item) + return super(_DataIDContainer, self._all_nodes).__contains__(item) def getitem(self, item): """Get Node when we know the *exact* DataID or DataQuery.""" - return super(DatasetDict, self._all_nodes).__getitem__(item) + return super(_DataIDContainer, self._all_nodes).__getitem__(item) + + def __str__(self): + """Render the dependency tree as a string.""" + return self._root.display() def get_compositor(self, key): """Get a compositor.""" @@ -527,6 +530,61 @@ def find_dependencies(self, dataset_keys, query=None): unknown_datasets.update(unknowns) continue - self.add_child(self, n) + self.add_child(self._root, n) return unknown_datasets + + +class _DataIDContainer(dict): + """Special dictionary object that can handle dict operations based on dataset name, wavelength, or DataID. + + Note: Internal dictionary keys are `DataID` objects. + + """ + + def keys(self, names=False, wavelengths=False): + """Give currently contained keys.""" + # sort keys so things are a little more deterministic (.keys() is not) + keys = sorted(super(_DataIDContainer, self).keys()) + if names: + return (k.get('name') for k in keys) + elif wavelengths: + return (k.get('wavelength') for k in keys) + else: + return keys + + def get_key(self, match_key, num_results=1, best=True, **dfilter): + """Get multiple fully-specified keys that match the provided query. + + Args: + match_key (DataID): DataID of query parameters to use for + searching. Any parameter that is `None` + is considered a wild card and any match is + accepted. Can also be a string representing the + dataset name or a number representing the dataset + wavelength. + num_results (int): Number of results to return. If `0` return all, + if `1` return only that element, otherwise + return a list of matching keys. + **dfilter (dict): See `get_key` function for more information. + + """ + return get_key(match_key, self.keys(), num_results=num_results, + best=best, **dfilter) + + def __getitem__(self, item): + """Get item from container.""" + try: + # short circuit - try to get the object without more work + return super(_DataIDContainer, self).__getitem__(item) + except KeyError: + key = self.get_key(item) + return super(_DataIDContainer, self).__getitem__(key) + + def __contains__(self, item): + """Check if item exists in container.""" + try: + key = self.get_key(item) + except KeyError: + return False + return super(_DataIDContainer, self).__contains__(key) diff --git a/satpy/tests/test_dependency_tree.py b/satpy/tests/test_dependency_tree.py new file mode 100644 index 0000000000..cb6714130c --- /dev/null +++ b/satpy/tests/test_dependency_tree.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2020 Satpy developers +# +# This file is part of satpy. +# +# satpy is free software: you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation, either version 3 of the License, or (at your option) any later +# version. +# +# satpy is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# satpy. If not, see . +"""Unit tests for the dependency tree class and dependencies.""" + +import unittest +from satpy.tests.utils import make_cid, make_dataid +from satpy.node import DependencyTree + + +class TestDependencyTree(unittest.TestCase): + """Test the dependency tree. + + This is what we are working with:: + + None (No Data) + +DataID(name='comp19') + + +DataID(name='ds5', resolution=250, modifiers=('res_change',)) + + + +DataID(name='ds5', resolution=250, modifiers=()) + + + +__EMPTY_LEAF_SENTINEL__ (No Data) + + +DataID(name='comp13') + + + +DataID(name='ds5', resolution=250, modifiers=('res_change',)) + + + + +DataID(name='ds5', resolution=250, modifiers=()) + + + + +__EMPTY_LEAF_SENTINEL__ (No Data) + + +DataID(name='ds2', resolution=250, calibration=, modifiers=()) + + """ + + def setUp(self): + """Set up the test tree.""" + self.dependency_tree = DependencyTree(None, None, None) + + composite_1 = make_cid(name="comp19") + dependency_1 = make_dataid(name="ds5", resolution=250, modifiers=("res_change",)) + dependency_1_1 = make_dataid(name="ds5", resolution=250, modifiers=tuple()) + node_composite_1 = self.dependency_tree.add_leaf(composite_1) + node_dependency_1 = self.dependency_tree.add_leaf(dependency_1, node_composite_1) + self.dependency_tree.add_leaf(dependency_1_1, node_dependency_1) + # ToDo: do we really want then empty node to be at the same level as the unmodified data? + node_dependency_1.add_child(self.dependency_tree.empty_node) + + dependency_2 = make_cid(name="comp13") + dependency_2_1 = dependency_1 + node_dependency_2 = self.dependency_tree.add_leaf(dependency_2, node_composite_1) + self.dependency_tree.add_leaf(dependency_2_1, node_dependency_2) + # We don't need to add the unmodified dependency a second time. + + dependency_3 = make_dataid(name='ds2', resolution=250, calibration="reflectance", modifiers=tuple()) + self.dependency_tree.add_leaf(dependency_3, node_composite_1) + + def test_copy_preserves_unique_empty_node(self): + """Test that dependency tree copy preserves the uniqueness of the empty node.""" + new_dependency_tree = self.dependency_tree.copy() + assert self.dependency_tree.empty_node is new_dependency_tree.empty_node + + self.assertIs(self.dependency_tree._root.children[0].children[0].children[1], + self.dependency_tree.empty_node) + self.assertIs(new_dependency_tree._root.children[0].children[0].children[1], + self.dependency_tree.empty_node) + + def test_new_dependency_tree_preserves_unique_empty_node(self): + """Test that dependency tree instantiation preserves the uniqueness of the empty node.""" + new_dependency_tree = DependencyTree(None, None, None) + assert self.dependency_tree.empty_node is new_dependency_tree.empty_node diff --git a/satpy/tests/test_scene.py b/satpy/tests/test_scene.py index c16f3e0a2d..4cf67dea66 100644 --- a/satpy/tests/test_scene.py +++ b/satpy/tests/test_scene.py @@ -1883,29 +1883,6 @@ def _test(self, sensor_names): available_comp_ids = scene.available_composite_ids() self.assertIn(make_cid(name='static_image'), available_comp_ids) - @mock.patch('satpy.composites.CompositorLoader.load_compositors') - @mock.patch('satpy.scene.Scene._create_reader_instances') - def test_empty_node_copy(self, cri, cl): - """Test copying a dependency tree while preserving the empty node identical.""" - import satpy.scene - from satpy.tests.utils import FakeReader, test_composites - cri.return_value = {'fake_reader': FakeReader( - 'fake_reader', 'fake_sensor')} - comps, mods = test_composites('fake_sensor') - cl.return_value = (comps, mods) - scene = satpy.scene.Scene(filenames=['bla'], - base_dir='bli', - reader='fake_reader') - - # Check dependency tree nodes - # initialize the dep tree without loading the data - scene._dependency_tree.find_dependencies({'comp19'}) - sc2 = scene.copy() - self.assertIs(scene._dependency_tree.children[0].children[0].children[1], scene._dependency_tree.empty_node) - self.assertIs(scene._dependency_tree.children[0].children[0].children[1], sc2._dependency_tree.empty_node) - self.assertIs(sc2._dependency_tree.children[0].children[0].children[1], scene._dependency_tree.empty_node) - self.assertIs(sc2._dependency_tree.children[0].children[0].children[1], sc2._dependency_tree.empty_node) - class TestSceneResampling(unittest.TestCase): """Test resampling a Scene to another Scene object.""" From f23777b08cde5494ae70d335db0987cdc05c4a16 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Fri, 7 Aug 2020 14:57:41 +0200 Subject: [PATCH 2/3] Clean up --- satpy/node.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/satpy/node.py b/satpy/node.py index 5828867cd2..0b533b0c94 100644 --- a/satpy/node.py +++ b/satpy/node.py @@ -550,10 +550,10 @@ def keys(self, names=False, wavelengths=False): return (k.get('name') for k in keys) elif wavelengths: return (k.get('wavelength') for k in keys) - else: - return keys - def get_key(self, match_key, num_results=1, best=True, **dfilter): + return keys + + def get_key(self, match_key): """Get multiple fully-specified keys that match the provided query. Args: @@ -563,14 +563,9 @@ def get_key(self, match_key, num_results=1, best=True, **dfilter): accepted. Can also be a string representing the dataset name or a number representing the dataset wavelength. - num_results (int): Number of results to return. If `0` return all, - if `1` return only that element, otherwise - return a list of matching keys. - **dfilter (dict): See `get_key` function for more information. """ - return get_key(match_key, self.keys(), num_results=num_results, - best=best, **dfilter) + return get_key(match_key, self.keys()) def __getitem__(self, item): """Get item from container.""" From 8c0cae4c9dbc7f005df6e9040de23b0a10fb3304 Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Fri, 7 Aug 2020 15:39:51 +0200 Subject: [PATCH 3/3] Clean up --- satpy/node.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/satpy/node.py b/satpy/node.py index 0b533b0c94..6ad2e082d2 100644 --- a/satpy/node.py +++ b/satpy/node.py @@ -542,25 +542,17 @@ class _DataIDContainer(dict): """ - def keys(self, names=False, wavelengths=False): + def keys(self): """Give currently contained keys.""" # sort keys so things are a little more deterministic (.keys() is not) - keys = sorted(super(_DataIDContainer, self).keys()) - if names: - return (k.get('name') for k in keys) - elif wavelengths: - return (k.get('wavelength') for k in keys) - - return keys + return sorted(super(_DataIDContainer, self).keys()) def get_key(self, match_key): """Get multiple fully-specified keys that match the provided query. Args: - match_key (DataID): DataID of query parameters to use for - searching. Any parameter that is `None` - is considered a wild card and any match is - accepted. Can also be a string representing the + match_key (DataID): DataID or DataQuery of query parameters to use for + searching. Can also be a string representing the dataset name or a number representing the dataset wavelength.