Skip to content
This repository has been archived by the owner on Jul 3, 2023. It is now read-only.

Commit

Permalink
Adds ability to specify applies_to in data quality decorator.
Browse files Browse the repository at this point in the history
This is *very* rough.

The idea is that we should be able to choose one of the following modes:

1. Apply a validator to every final nod ein a subdag of a decorated function
2. Apply a validator to a specific node within the subdag of a decorated
   function

This basically allows (1) which is the default, but also (2) if using
the applies_to keyword. Note that this only works if its in the final
subdag (I.E. a sync), and not in the middle. We should add that but
it'll be a little bit of a change. Nothing we can't make backwards
compatible, we just might need to crawl back a little further in our
layered API -- E.G. use subdag transformer rather than node transformer.

Either way, this shows that we can ddo what we want without too many
modifications.

Note that this is not tested, just a proof of concept.
  • Loading branch information
elijahbenizzy committed Jul 4, 2022
1 parent d144d95 commit 6ccc12a
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions hamilton/function_modifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,8 +749,10 @@ def validate(self, fn: Callable):

class BaseDataValidationDecorator(function_modifiers_base.NodeTransformer):

@staticmethod
def should_validate(node_: node.Node, config: Dict[str, Any], validator_name: str) -> bool:
def __init__(self, applies_to: Optional[List[str]] = None):
self.applies_to = applies_to

def should_validate(self, node_: node.Node, config: Dict[str, Any], validator_name: str) -> bool:
"""Quick POC that we can wire stuff through as needed.
Say one has a node called `foo`. We might want the following:
Expand All @@ -764,6 +766,8 @@ def should_validate(node_: node.Node, config: Dict[str, Any], validator_name: st
1. "data_quality.foo.disable = True"
2. "data_quality.foo.disable = ['check_1, 'check_2']
"""
if self.applies_to is not None and node_.name not in self.applies_to:
return False # Not something we want to validate
global_disable_key = f"data_quality.disable"
if global_disable_key in config and config[global_disable_key] == True:
return False
Expand Down Expand Up @@ -880,12 +884,13 @@ def validate(self, fn: Callable):


class check_output_custom(BaseDataValidationDecorator):
def __init__(self, *validators: base.DataValidator, profiler: base.DataProfiler):
def __init__(self, *validators: base.DataValidator, profiler: base.DataProfiler, applies_to: Optional[List[str]] = None):
"""Creates a check_output_custom decorator. This allows
passing of custom validators that implement the DataValidator interface.
@param validator: Validator to use.
"""
super(check_output_custom).__init__(applies_to=applies_to)
self.validators = validators
self.profiler = profiler

Expand All @@ -907,6 +912,7 @@ def get_validators(self, node_to_validate: node.Node) -> List[base.DataValidator
def __init__(self,
importance: str = base.DataValidationLevel.WARN.value,
default_decorator_candidates: Type[hamilton.data_quality.base.BaseDefaultValidator] = None,
*, applies_to: List[str] = None,
**default_validator_kwargs: Any):
"""Creates the check_output validator. This constructs the default validator class.
Note that this creates a whole set of default validators
Expand All @@ -915,6 +921,7 @@ def __init__(self,
:param importance: For the default validator, how important is it that this passes.
:param validator_kwargs: keyword arguments to be passed to the validator
"""
super(check_output, self).__init__(applies_to=applies_to)
self.importance = importance
self.default_validator_kwargs = default_validator_kwargs
self.default_decorator_candidates = default_decorator_candidates
Expand Down

0 comments on commit 6ccc12a

Please sign in to comment.