## Joining on Non-Equi Operators

In [None]:
import pandas as pd
import janitor
import numpy as np

In [None]:
#https://stackoverflow.com/q/61948103/7175713 
df1 = pd.DataFrame({'id': [1,1,1,2,2,3], 
                    'value_1': [2,5,7,1,3,4]})

df2 = pd.DataFrame({'id': [1,1,1,1,2,2,2,3], 
                    'value_2A': [0,3,7,12,0,2,3,1], 
                    'value_2B': [1,5,9,15,1,4,6,3]})

In [None]:
df1

In [None]:
df2

Join on equi and non-equi operators is possible:

In [None]:
df1.conditional_join(
        df2,
        ('id', 'id', '=='),
        ('value_1', 'value_2A', '>='),
        ('value_1', 'value_2B', '<='),
        sort_by_appearance = True
    )

The default join is inner. left and right joins are supported as well:

In [None]:
df1.conditional_join(
        df2,
        ('id', 'id', '=='),
        ('value_1', 'value_2A', '>='),
        ('value_1', 'value_2B', '<='),
        how='left',
        sort_by_appearance = True
    )

In [None]:
df1.conditional_join(
        df2,
        ('id', 'id', '=='),
        ('value_1', 'value_2A', '>='),
        ('value_1', 'value_2B', '<='),
        how='right',
        sort_by_appearance = True
    )

Join on just the non-equi joins is also possible:

In [None]:
df1.conditional_join(
        df2,
        ('value_1', 'value_2A', '>'),
        ('value_1', 'value_2B', '<'),
        how='inner',
        sort_by_appearance = True
    )

Join on just equality is also possible, but should be avoided, as Pandas' merge/join is more efficient (it uses Pandas' internal merge functions anyway);

In [None]:
df1.conditional_join(
    df2,
    ('id', 'id', "==")
)

Join on not equal -> !=

In [None]:
df1.conditional_join(
        df2,
        ('id', 'id', "!=")
    )

If the columns from both dataframes have nothing in common, a single indexed column is returned:

In [None]:
(df1.select_columns('value_1')
    .conditional_join(
        df2.select_columns('val*'),
        ('value_1', 'value_2A', '>'),
        ('value_1', 'value_2B', '<'),
    )
)

In [2]:
from io import StringIO
import pandas as pd
import janitor 
import numpy as np

data = """A    B   C      D          E
    0  0  0.0     r    False    1970-01-01"""

df = pd.read_csv(StringIO(data), sep='\s{2,}', engine='python', parse_dates=['E'])

df

Unnamed: 0,A,B,C,D,E
0,0,0.0,r,False,1970-01-01


In [3]:
data = """Integers  Numeric  Floats   Strings  Booleans      Dates   Dates_Right
    0         0      0.0     0.0      r       False   1970-01-01    1970-01-01"""

right = pd.read_csv(StringIO(data), sep='\s{2,}', engine='python', parse_dates=['Dates', 'Dates_Right'])

right

Unnamed: 0,Integers,Numeric,Floats,Strings,Booleans,Dates,Dates_Right
0,0,0.0,0.0,r,False,1970-01-01,1970-01-01


In [4]:
middle, left_on, right_on = ("E", "Dates", "Dates_Right")

df.conditional_join(
        right,
        (middle, left_on, ">"),
        (middle, right_on, "<"),
        how="inner",
        sort_by_appearance=True,
    )

Unnamed: 0,A,B,C,D,E,Integers,Numeric,Floats,Strings,Booleans,Dates,Dates_Right


In [5]:
(
        df.assign(t=1)
        .merge(right.assign(t=1), on="t")
        .query(f"{left_on} < {middle} < {right_on}")
        .reset_index(drop=True)
    )


Unnamed: 0,A,B,C,D,E,t,Integers,Numeric,Floats,Strings,Booleans,Dates,Dates_Right
