Skip to content

Commit

Permalink
metadata clean up and more testing
Browse files Browse the repository at this point in the history
  • Loading branch information
Eduardo Blancas Reyes committed Oct 4, 2020
1 parent d902da8 commit 92bbd02
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 25 deletions.
45 changes: 45 additions & 0 deletions src/ploomber/_testing_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
Utlity functions to run Ploomber tests. This is not a public module
"""


def assert_no_extra_attributes_in_class(abstract_class,
concrete_class,
allowed=None):
"""
Ploomber makes heavy use of abstract classes to provide a uniform API for
tasks, products, metadata, etc. WHen defining abstract classes, the
interpreter refused to instantiate an object where the concrete class
misses implementation for abstract methods. However, it does not complain
if the concrete class implements *extra methods*
This has been problematic to remove old code. As we simplify the API,
sometimes concrete classes become outdated. For example, before creating
Metadata, all metadata logic was embedded in the Product objects, when
we made the change, we removed some abstract methods from the Product class
but it took a long time to realize that we sould've removed these mehods
from the MetaProduct class as well. We use this function to alert us
when there are things we can remove.
The other case also happens: we add functionality to concrete classes but
we do not do it in the abstract class, when this happens we have to decide
whether to add them to the abstract class (recommended) or make an
exception in such case, those new methods should be named with double
leading underscore to be ignored by this check and to prevent polluting the
public interface. Single leading uncerscore methods are flagged to allow
abstract classes define its own private API, which is also important
for consistency, even if the end user does not use these methods.
"""
allowed = allowed or set()

extra_attrs = {
attr
for attr in set(dir(concrete_class)) - set(dir(abstract_class))
if not attr.startswith('__')
} - allowed

if extra_attrs:
raise ValueError('The following methods/attributes in {} '
'are not part of the {} interface: {}'.format(
concrete_class.__name__, abstract_class.__name__,
extra_attrs))
43 changes: 30 additions & 13 deletions src/ploomber/products/Metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ def __init__(self, product):
__name__,
type(self).__name__))

@property
@abc.abstractmethod
def _data(self):
"""
Private API, returns the dictionary representation of the metadata
"""
pass

@property
@abc.abstractmethod
def timestamp(self):
Expand Down Expand Up @@ -60,7 +68,18 @@ def clear(self):
"""
pass

@abc.abstractmethod
def update_locally(self, data):
"""
Updates metadata locally. Called then tasks are successfully
executed in a subproces, to make the local copy synced again (because
the call to .update() happens in the subprocess as well)
"""
pass

def to_dict(self):
"""Returns a dict copy of ._data
"""
return deepcopy(self._data)

def __eq__(self, other):
Expand Down Expand Up @@ -198,11 +217,6 @@ def update(self, source_code):
self._data = new_data

def update_locally(self, data):
"""
Updates metadata locally. Should be called then tasks are successfully
executed in a subproces, to make the local copy synced again (because
the call to .update() happens in the subprocess as well)
"""
# NOTE: do we have to copy here? is it a problem if all products
# in a metadproduct have the same obj in metadata?
self._data = data
Expand Down Expand Up @@ -237,8 +251,6 @@ def __init__(self, products):

@property
def timestamp(self):
"""When the product was originally created
"""
# TODO: refactor, all products should have the same metadata
timestamps = [
p.metadata.timestamp for p in self._products
Expand All @@ -251,8 +263,6 @@ def timestamp(self):

@property
def stored_source_code(self):
"""Source code that generated the product
"""
stored_source_code = set([
p.metadata.stored_source_code for p in self._products
if p.metadata.stored_source_code is not None
Expand All @@ -268,14 +278,10 @@ def stored_source_code(self):
return list(stored_source_code)[0]

def update(self, source_code):
"""
"""
for p in self._products:
p.metadata.update(source_code)

def update_locally(self, data):
"""Updates metadata locally
"""
for p in self._products:
p.metadata.update_locally(data)

Expand All @@ -294,6 +300,10 @@ def clear(self):
def to_dict(self):
return list(self._products)[0].metadata.to_dict()

@property
def _data(self):
return list(self._products)[0].metadata._data


class MetadataAlwaysUpToDate(AbstractMetadata):
"""
Expand All @@ -316,6 +326,13 @@ def _get(self):
def update(self, source_code):
pass

def update_locally(self, data):
pass

@property
def _data(self):
return {'timestamp': 0, 'stored_source_code': None}

def delete(self):
pass

Expand Down
8 changes: 7 additions & 1 deletion tests/products/test_Metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

import pytest

from ploomber.products.Metadata import Metadata
from ploomber.products.Metadata import Metadata, AbstractMetadata
from ploomber.products import Product
from ploomber._testing_utils import assert_no_extra_attributes_in_class


class FakeProduct(Product):
Expand All @@ -23,6 +24,11 @@ def delete(self, force=False):
pass


@pytest.mark.parametrize('concrete_class', AbstractMetadata.__subclasses__())
def test_interfaces(concrete_class):
assert_no_extra_attributes_in_class(AbstractMetadata, concrete_class)


def test_clear():
prod = Mock(wraps=FakeProduct(identifier='fake-product'))
# we need this because if it doesn't exist, fetch_metata is skipped
Expand Down
16 changes: 5 additions & 11 deletions tests/test_metaproduct.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,20 @@
from ploomber import DAG
from ploomber.tasks import ShellScript
from ploomber.products import File, MetaProduct, Product
from ploomber._testing_utils import assert_no_extra_attributes_in_class


# FIXME: move this to a test_Product file and check other Product concrete
# classes
def test_interface():
"""
Look for unnecessary implemeneted methods/attributes in MetaProduct,
this helps us keep the API up-to-date if the Product interface changes
"""

# look for extra attrs, but allow the ones we get from
# collections.abc.Mapping
extra_attrs = {
attr
for attr in set(dir(MetaProduct)) - set(dir(Product))
if not attr.startswith('__')
} - {'get', 'keys', 'items', 'values'}

if extra_attrs:
raise ValueError(
'The following methods/attributes in MetaProduc '
'are not part of the Product interface: {}'.format(extra_attrs))
assert_no_extra_attributes_in_class(
Product, MetaProduct, allowed={'get', 'keys', 'items', 'values'})


def test_get():
Expand Down

0 comments on commit 92bbd02

Please sign in to comment.