In [None]:
import json
from datetime import datetime
from split_query.core import *
from split_query.remote import soql_hook, traverse_expression
from split_query.core.algorithms import simplify_flat_and, _normalise_input
from split_query.engine import query_df
from split_query.core.expand import expand_dnf_simplify, expand_dnf
import pandas as pd
from functools import lru_cache

In [None]:
with open('simplify_calls.json') as infile:
    data = json.load(infile, object_hook=object_hook)

In [None]:
df1 = pd.DataFrame(dict(
    datetime=pd.date_range(datetime(2012, 1, 1), datetime(2018, 1, 1)),
    sensor='Town Hall (West)', hourly_count=1))
df2 = pd.DataFrame(dict(
    datetime=pd.date_range(datetime(2012, 1, 1), datetime(2018, 1, 1)),
    sensor='Southbank', hourly_count=2))
df3 = pd.DataFrame(dict(
    datetime=pd.date_range(datetime(2012, 1, 1), datetime(2018, 1, 1)),
    sensor='Bourke St', hourly_count=3))
df = pd.concat([df1, df2, df3])

In [None]:
def equivalent(e1, e2):
    try:
        return (query_df(df, e1) == query_df(df, e2)).all().all()
    except ValueError:
        return False

In [None]:
errors = [entry for entry in data if not equivalent(entry['expression'], entry['simplified'])]

In [None]:
for error in errors:
    print('Initial               ', equivalent(error['expression'], error['simplified']))
    print('expand_dnf            ', equivalent(error['expression'], expand_dnf(error['expression'])))
    print('expand_dnf_simplify   ', equivalent(error['expression'], expand_dnf_simplify(error['expression'])))
    print()

In [None]:
results = []
for error in errors:
    expression = error['expression']
    expanded = expand_dnf(expression)
    clauses = list(expanded.clauses)
    new_clauses = [simplify_flat_and(cl) for cl in clauses]
    bad_clauses = [
        (cl, new_cl) for cl, new_cl in zip(clauses, new_clauses)
        if not equivalent(cl, new_cl)]
    results.extend(bad_clauses)

In [None]:
sorted(
    [cl for cl in results[0][0].clauses],
    key=lambda x: x.__class__.__name__)

In [None]:
dt = Attribute('dt')
sen = Attribute('sensor')

[Eq(ATTR(sensor),'Southbank'),
 Ge(ATTR(datetime),datetime.datetime(2013, 2, 3, 0, 0)),
 Ge(ATTR(datetime),datetime.datetime(2014, 1, 1, 0, 0)),
 In(ATTR(sensor),frozenset({'Southbank'})),
 Le(ATTR(datetime),datetime.datetime(2015, 1, 1, 0, 0)),
 Lt(ATTR(datetime),datetime.datetime(2016, 1, 1, 0, 0)),
 Lt(ATTR(datetime),datetime.datetime(2015, 1, 1, 0, 0)),
 Not(Lt(ATTR(datetime),datetime.datetime(2014, 1, 1, 0, 0))),
 Not(Eq(ATTR(datetime),datetime.datetime(2015, 1, 1, 0, 0))),
 Not(Gt(ATTR(datetime),datetime.datetime(2015, 1, 1, 0, 0))),
 Not(Eq(ATTR(sensor),'Town Hall (West)'))]