# 🏭🏭 Factory creators
This noteobook experimentally implements a set of validator factory creators for common validator factory patterns using some ✨ magic to produce the messages from the different components.

The goal with this proposed feature is to DRY and simplify how predefined validator factories are defined within this library:

```pydfdf
# cells.py

all_eq = quantitative_validator_factory(vf.CellsValidator, R.all, R.equals)
sum_eq = reductive_validator_factory(vf.CellsValidator, R.sum, R.equals)
```

Note that usage remains the same for downstream consumers:

```py
vf.cells.all_eq(0).validate(df)
vf.cells.sum_eq(42).validate(df)
``` 

In [1]:
from typing import Callable

import ramda as R
import validframe as vf
import pandas as pd
import numpy as np

In [2]:
PREDICATES = {
    R.equals: "equal to {x}",
    R.is_: "instance of type {x}",
    R.lt: "less than to {x}",
    R.lte: "less than or equal to {x}",
}

In [3]:
QUANTIFIERS = {
    R.all: "all",
    R.any: "any",
}

REDUCERS = {
    R.sum: "sum",
    R.product: "product"
}

In [4]:
def quantitative_validator_factory(quantifier: Callable, predicate: Callable) -> Callable:
    return lambda x, **slice_kwargs: vf.CellsValidator(
        quantifier(predicate(x)),
        '(slice={s}) {q} of the cells must be {c}'.format(s=slice_kwargs, q=QUANTIFIERS[quantifier], c=PREDICATES[predicate].format(x=repr(x)), ),
        **slice_kwargs
    )

In [5]:
all_eq = quantitative_validator_factory(R.all, R.equals)
all_eq(1, cols=['amount']).msg

"(slice={'cols': ['amount']}) all of the cells must be equal to 1"

In [6]:
df = pd.DataFrame([[1,1,1],[1,1,1],[2,2,2]])

In [7]:
def call_validators(df, validators):
    try:
        for validator in validators:
            validator.validate(df)
    except AssertionError as e:
        print('failed:', e)
    except Exception:
        print('error:', 'something went wrong')

In [8]:
call_validators(df, [
    all_eq(1, rows=[0,1]),
    all_eq(2, rows=[2])
])

In [9]:
any_eq = quantitative_validator_factory(R.any, R.equals)

In [10]:
call_validators(df,[
    any_eq(2, cols=[0]),
    any_eq(np.nan, cols=[0])
])

failed: (slice={'cols': [0]}) any of the cells must be equal to nan


In [11]:
# mismatched - should use reduction_validator for sum
sum_eq = quantitative_validator_factory(R.sum, R.equals)

try:
    sum_eq(12)
except Exception:
    print('error:', 'something went wrong')

error: something went wrong


In [12]:
sum_eq = R.compose(R.equals, R.sum)
sum_eq([1,2,3])(6)

True

In [13]:
sum_eq = lambda xs: R.equals(R.sum(xs))
sum_eq([1,2,3])(6)

True

In [14]:
def reduction_validator(reducer: Callable, predicate: Callable) -> Callable:
    return lambda x, **slice_kwargs: vf.CellsValidator(
        R.compose(predicate(x), reducer),
        '(slice={s}) {q} of the cells must be {c}'.format(s=slice_kwargs, q=REDUCERS[reducer], c=PREDICATES[predicate].format(x=repr(x)), ),
        **slice_kwargs
    )

In [15]:
sum_eq = reduction_validator(R.sum, R.equals)

sum_eq(12).msg

'(slice={}) sum of the cells must be equal to 12'

In [16]:
sum_eq(12).validate(df)

call_validators(df,[
    sum_eq(12, rows=[0])
])

failed: (slice={'rows': [0]}) sum of the cells must be equal to 12


In [17]:
all_eq = quantitative_validator_factory(R.all, R.equals)
all_is = quantitative_validator_factory(R.all, R.is_)
all_lte = quantitative_validator_factory(R.all, R.lte)

sum_eq = reduction_validator(R.sum, R.equals)
sum_eq(12).validate(df)

In [18]:
VALIDATORS = {
    vf.CellsValidator: 'cells',
    vf.RowsValidator: 'rows',
}

def reductive_validator_factory_next(validator_type = vf.CellsValidator, reducer: Callable, predicate: Callable) -> Callable:
    return lambda x, **slice_kwargs: validator_type(
        R.compose(predicate(x), reducer),
        '(slice={s}) {q} of the {v} must be {c}'.format(s=slice_kwargs, q=REDUCERS[reducer], v=VALIDATORS[validator_type], c=PREDICATES[predicate].format(x=repr(x))),
        **slice_kwargs
    )

In [19]:
# now we can sum rows
sum_eq = reductive_validator_factory_next(R.sum, R.equals, vf.RowsValidator)

try:
    # rows are `pd.Series` and summing them also returns a `pd.Series`
    expected_sum = pd.Series([4,4,4])
    sum_eq(expected_sum).validate(df)
except Exception:
    print('error:', 'something went wrong, still...')

error: something went wrong, still...


In [20]:
# just remember that applying `==` to `pd.Series` actually returns a mask, just like with `pd.DataFrame`
pd.Series([4,4,4]) == pd.Series([4,4,4])

0    True
1    True
2    True
dtype: bool

In [21]:
# for equality check we want this instead:
pd.Series([4,4,4]).equals(pd.Series([4,4,4]))

True

In [22]:
# or better yet with the static method:
pd.Series.equals(pd.Series([4,4,4]), pd.Series([4,4,4]))

True

In [23]:
# so putting it all together:
R_series_equals = lambda y: lambda x: pd.Series.equals(y, x)
PREDICATES[series_equals] = 'equal {x}'

rows_sum_eq = reductive_validator_factory_next(R.sum, R_series_equals, vf.RowsValidator)

rows_sum_eq(expected_sum).validate(df)
print('passed:', rows_sum_eq(expected_sum).msg)

passed: (slice={}) sum of the rows must be equal 0    4
1    4
2    4
dtype: int64


In [26]:
# if we were jinja ninjas we could maybe make the message better
PREDICATES[series_equals] = 'equal {{x.to_list()}}'

expected_sum.to_list()

[4, 4, 4]