In [None]:
import functools
from datetime import datetime

import pandas as pd
import sodapy
from dateutil.parser import parse as parse_dt

from split_query.expressions import Attribute, Ge, Le, Gt, Lt, And, Or, Eq
from split_query.simplify import simplify_tree
from split_query.truth_table import expand_dnf
from split_query.domain import simplify_domain
from interface import DataSet
from engine import map_query_df
from converters import convert_expression, soql, in_to_or, map_to_ids

In [None]:
def _map_to_year_mdate(obj):
    ''' Should generalise map_to_ids to do this: generally take a function or
    map which converts values for a particular attribute. '''
    if isinstance(obj, Ge) and obj.attribute == Attribute('datetime'):
        return Or([
            # And([Eq(Attribute('year'), obj.value.year), Ge(Attribute('mdate'), obj.value.month)]),
            Ge(Attribute('year'), obj.value.year)])
    if isinstance(obj, Le) and obj.attribute == Attribute('datetime'):
        return Or([
            # And([Eq(Attribute('year'), obj.value.year), Le(Attribute('mdate'), obj.value.month)]),
            Le(Attribute('year'), obj.value.year)])
    return obj

def to_soql_where(expression):
    expression = convert_expression(expression, hook=_map_to_year_mdate)
    attribute_map = {
        Attribute('sensor'): (Attribute('sensor_id'), {
            'bourke': 27, 'grattan': 48})}
    expression = convert_expression(expression, hook=functools.partial(
        map_to_ids, attribute_map=attribute_map))
    expression = convert_expression(expression, hook=in_to_or)
    expression = convert_expression(expression, hook=soql)
    return expression

def parse_remote(entry):
    return {
        'datetime': parse_dt(entry['daet_time']),
        'hourly_count': int(entry['qv_market_peel_st']),
        'sensor_id': int(entry['sensor_id'])}

class Remote(object):

    def __init__(self):
        self.api = sodapy.Socrata('data.melbourne.vic.gov.au', app_token=None)
        self.page = 10000

    def paged_query(self, expression):
        where = to_soql_where(expression)
        offset = 0
        while True:
            part = self.api.get('cb85-mn2u', where=where, limit=self.page, offset=offset)
            for entry in part:
                yield entry
            if len(part) < self.page:
                break
            offset += self.page

    def query(self, expression):
        return pd.DataFrame(list(map(parse_remote, self.paged_query(expression))))

class Backend(object):

    def __init__(self, remote):
        self.cache = dict()
        self.remote = remote

    def query(self, expression):
        if expression in self.cache:
            result = self.cache[expression]
            print('Retrieved from cache: {}'.format(result.shape[0]))
            return result[map_query_df(result, expression)]
        result = self.remote.query(expression)
        self.cache[expression] = result
        print('Retrieved from remote: {}'.format(result.shape[0]))
        return result[map_query_df(result, expression)]


attributes = [Attribute(n) for n in ['datetime', 'hourly_count', 'sensor_id']]
dataset = DataSet('Pedestrians', attributes, Backend(Remote()))

In [None]:
filtered = dataset[
    dataset.datetime.between(datetime(2016, 6, 1), datetime(2016, 8, 1))][
    dataset.sensor_id.isin([27, 28])]
filtered.get().head()

In [None]:
filtered.get().head()

In [None]:
filtered = dataset[
    dataset.datetime.between(datetime(2016, 6, 1), datetime(2016, 8, 1))][
    dataset.sensor_id.isin([27])]
filtered.get().head()

In [None]:
filtered = dataset[
    dataset.datetime.between(datetime(2016, 6, 1), datetime(2016, 8, 1))][
    dataset.sensor_id.isin([27, 28, 29])]
filtered.get().head()