Skip to content

Commit

Permalink
BUG/CLN: Fix predicates on Selections on Joins
Browse files Browse the repository at this point in the history
The motivation for this PR is twofold    1) fix a bug where filters on
joined data would either fail or give the wrong result, depending on
which version of pandas you're on  2) refactor the
`execute_selection_dataframe` function because it was getting rather
large and unreadable.    The bug in ibis-project#1136 was occurring due to the
fact that the predicates in the `Selection` were not being evaluated
against the `data` argument  which is the result of the joined data.
This doesn't fail on non-`Join` selections because there's no possible
ambiguity regarding which column to select. The solution is to map
each root table in each predicate to `data`, therefore evaluating the
predicate against the joined data.    Closes ibis-project#1136.

Author: Phillip Cloud <cpcloud@gmail.com>

Closes ibis-project#1149 from cpcloud/fix-selection-join-predicates and squashes the following commits:

d76b2a8 [Phillip Cloud] BUG/CLN: Fix predicates on Selections on Joins
  • Loading branch information
cpcloud committed Oct 16, 2017
1 parent b01c533 commit 34cc0c2
Show file tree
Hide file tree
Showing 21 changed files with 1,204 additions and 593 deletions.
2 changes: 2 additions & 0 deletions ibis/expr/api.py
Expand Up @@ -1998,6 +1998,7 @@ def between_time(arg, lower, upper, timezone=None):
'any_inner': _ops.AnyInnerJoin,
'any_left': _ops.AnyLeftJoin,
'outer': _ops.OuterJoin,
'right': _ops.RightJoin,
'left_semi': _ops.LeftSemiJoin,
'semi': _ops.LeftSemiJoin,
'anti': _ops.LeftAntiJoin,
Expand All @@ -2019,6 +2020,7 @@ def join(left, right, predicates=(), how='inner'):
- 'inner': inner join
- 'left': left join
- 'outer': full outer join
- 'right': right outer join
- 'semi' or 'left_semi': left semi join
- 'anti': anti join
Expand Down
File renamed without changes.
6 changes: 3 additions & 3 deletions ibis/pandas/core.py
Expand Up @@ -13,7 +13,7 @@
import ibis.expr.types as ir
import ibis.expr.datatypes as dt

import ibis.pandas.context as ctx
import ibis.pandas.aggcontext as agg_ctx
from ibis.pandas.dispatch import (
execute, execute_node, execute_first, data_preload
)
Expand Down Expand Up @@ -92,7 +92,7 @@ def execute_with_scope(expr, scope, context=None, **kwargs):
return scope[op]

if context is None:
context = ctx.Summarize()
context = agg_ctx.Summarize()

try:
computed_args = [scope[t] for t in op.root_tables()]
Expand Down Expand Up @@ -175,6 +175,6 @@ def execute_without_scope(
return execute(
expr,
new_scope,
context=context if context is not None else ctx.Summarize(),
context=context if context is not None else agg_ctx.Summarize(),
**kwargs
)
4 changes: 4 additions & 0 deletions ibis/pandas/execution/__init__.py
@@ -0,0 +1,4 @@
from ibis.pandas.execution.generic import * # noqa: F401,F403
from ibis.pandas.execution.selection import * # noqa: F401,F403
from ibis.pandas.execution.join import * # noqa: F401,F403
from ibis.pandas.execution.window import * # noqa: F401,F403
74 changes: 74 additions & 0 deletions ibis/pandas/execution/constants.py
@@ -0,0 +1,74 @@
"""Constants for the pandas backend.
"""

import operator

import numpy as np
import pandas as pd

import ibis
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops


JOIN_TYPES = {
ops.LeftJoin: 'left',
ops.RightJoin: 'right',
ops.InnerJoin: 'inner',
ops.OuterJoin: 'outer',
}


LEFT_JOIN_SUFFIX = '_ibis_left_{}'.format(ibis.util.guid())
RIGHT_JOIN_SUFFIX = '_ibis_right_{}'.format(ibis.util.guid())
JOIN_SUFFIXES = LEFT_JOIN_SUFFIX, RIGHT_JOIN_SUFFIX
ALTERNATE_SUFFIXES = {
LEFT_JOIN_SUFFIX: RIGHT_JOIN_SUFFIX,
RIGHT_JOIN_SUFFIX: LEFT_JOIN_SUFFIX,
}


IBIS_TYPE_TO_PANDAS_TYPE = {
dt.float: np.float32,
dt.double: np.float64,
dt.int8: np.int8,
dt.int16: np.int16,
dt.int32: np.int32,
dt.int64: np.int64,
dt.string: str,
dt.timestamp: 'datetime64[ns]',
}


IBIS_TO_PYTHON_LITERAL_TYPES = {
dt.double: float,
dt.float: float,
dt.int64: int,
dt.int32: int,
dt.int16: int,
dt.int8: int,
dt.string: str,
dt.date: lambda x: pd.Timestamp(x).to_pydatetime().date(),
}


BINARY_OPERATIONS = {
ops.Greater: operator.gt,
ops.Less: operator.lt,
ops.LessEqual: operator.le,
ops.GreaterEqual: operator.ge,
ops.Equals: operator.eq,
ops.NotEquals: operator.ne,

ops.And: operator.and_,
ops.Or: operator.or_,
ops.Xor: operator.xor,

ops.Add: operator.add,
ops.Subtract: operator.sub,
ops.Multiply: operator.mul,
ops.Divide: operator.truediv,
ops.FloorDivide: operator.floordiv,
ops.Modulus: operator.mod,
ops.Power: operator.pow,
}

0 comments on commit 34cc0c2

Please sign in to comment.