# Test cases

Cases below are working as designed.
Better avenue for test cases: rather than an arbitrary tree, use a sequence of intervals/rectangles/boxes added and subtracted.
Test these on gridded data to validate the simplifier and data filtering.

In [1]:
from functools import lru_cache

from split_query.simplify import simplify_tree
from split_query.domain import simplify_domain
from split_query.expressions import And, Or, Not, Eq, Le, Lt, Ge, Gt, Float
from split_query.truth_table import expand_dnf

def pprint(result):
    if isinstance(result, And) or isinstance(result, Or):
        print(result.__class__.__name__)
        for cl in result.clauses:
            print(cl)
    else:
        print(result)

def interval(xl, xu):
    x, y = (Float(name) for name in 'xy')
    return And([Ge(x, xl), Le(x, xu)])

def rect(xl, xu, yl, yu):
    x, y = (Float(name) for name in 'xy')
    return And([Ge(x, xl), Le(x, xu), Ge(y, yl), Le(y, yu)])

@lru_cache()    # Because some of the simplifications are slow...
def simplify(expression):
    return simplify_tree(simplify_domain(expand_dnf(simplify_domain(simplify_tree(expression)))))

In [2]:
# The really simple case: subtract the same query, get False, so we know all data is covered.
expression = And([rect(0, 1, 0, 1), Not(rect(0, 1, 0, 1))])
pprint(simplify(expression))

False


In [3]:
# Slightly more complicated: subtract where the difference is only one dimensional.
expression = And([interval(0, 3), Not(interval(1, 2))])
pprint(simplify(expression))

Or
And([Ge(x,0.0), Lt(x,1.0)])
And([Le(x,3.0), Gt(x,2.0)])


In [4]:
# Subtracting 2D rectangles.
expression = And([rect(0, 2, 0, 2), Not(rect(0, 1, 0, 1))])
pprint(simplify(expression))

Or
And([Le(y,1.0), Gt(x,1.0), Ge(y,0.0), Le(x,2.0)])
And([Gt(y,1.0), Gt(x,1.0), Le(y,2.0), Le(x,2.0)])
And([Ge(x,0.0), Le(x,1.0), Le(y,2.0), Gt(y,1.0)])


In [5]:
# Subtracting 2D intervals. This one gets a lot more complex, so the original query is probably
# more sensible to use. There is some opportunistic simplification to be done by pairing up the
# different And clauses here (creating larger rectangles).
expression = And([rect(0, 3, 0, 3), Not(rect(1, 2, 1, 2))])
pprint(simplify(expression))

Or
And([Le(y,3.0), Ge(x,1.0), Gt(y,2.0), Le(x,2.0)])
And([Le(x,3.0), Le(y,3.0), Gt(y,2.0), Gt(x,2.0)])
And([Le(x,3.0), Ge(y,1.0), Le(y,2.0), Gt(x,2.0)])
And([Ge(x,0.0), Le(y,3.0), Lt(x,1.0), Gt(y,2.0)])
And([Ge(x,0.0), Ge(y,1.0), Lt(x,1.0), Le(y,2.0)])
And([Ge(x,0.0), Lt(y,1.0), Ge(y,0.0), Lt(x,1.0)])
And([Le(x,3.0), Lt(y,1.0), Ge(y,0.0), Gt(x,2.0)])
And([Lt(y,1.0), Ge(y,0.0), Ge(x,1.0), Le(x,2.0)])


In [6]:
# Much subtraction of 2D intervals to create a bigger query. This is the key useful bit: the
# cache will be able to tell when data requirements are completely satisfied.
expression = And([
    rect(0, 3, 0, 3),
    Not(rect(0, 3, 0, 1)),
    Not(rect(1, 2, 1, 2)),
    Not(rect(0, 1, 1, 3)),
    Not(rect(2, 3, 1, 4)),
    Not(rect(1, 3, 2, 5)),
])
pprint(simplify(expression))

False


In [7]:
# Definitely some simplification strategies to be done here.
expression = And([True, Not(rect(0, 1, 0, 1))])
pprint(simplify(expression))

Or
And([Ge(x,0.0), Le(x,1.0), Gt(y,1.0)])
And([Ge(x,0.0), Le(x,1.0), Lt(y,0.0)])
And([Gt(y,1.0), Gt(x,1.0)])
And([Gt(y,1.0), Lt(x,0.0)])
And([Gt(x,1.0), Lt(y,0.0)])
And([Lt(x,0.0), Lt(y,0.0)])
And([Le(y,1.0), Ge(y,0.0), Lt(x,0.0)])
And([Le(y,1.0), Gt(x,1.0), Ge(y,0.0)])
