In [1]:
import pandas as pd
import numpy as np 
import janitor 
from io import StringIO
from janitor.utils import _not_equal_indices
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())

In [2]:
data = """ A    B   C      D          E
    0  0    0.0    r    False   1970-01-01
    1  0    NaN   r    False   1970-01-01
    """
    
df = pd.read_csv(StringIO(data), sep='\s{2,}', engine='python', parse_dates=['E'])

In [3]:
data = """Integers  Numeric  Floats   Strings  Booleans      Dates    Dates_Right
    0         1      1.0     0.0       r      False    1970-01-01     1970-01-01
    """

right = pd.read_csv(StringIO(data), sep='\s{2,}', engine='python', parse_dates=['Dates', 'Dates_Right'])

In [4]:
df

Unnamed: 0,A,B,C,D,E
0,0,0.0,r,False,1970-01-01
1,0,,r,False,1970-01-01


In [5]:
right

Unnamed: 0,Integers,Numeric,Floats,Strings,Booleans,Dates,Dates_Right
0,1,1.0,0.0,r,False,1970-01-01,1970-01-01


In [6]:
query = """select A, Integers, B, Numeric
           from df
           join right
           on A != Integers and B != Numeric
        """

pysqldf(query)

Unnamed: 0,A,Integers,B,Numeric
0,0,1,0.0,1.0


In [7]:
df.dtypes

A             int64
B           float64
C            object
D              bool
E    datetime64[ns]
dtype: object

In [8]:
_not_equal_indices(df.A, right.Integers)

(array([0, 1]), array([0, 0]))

In [9]:
_not_equal_indices(df.B, right.Numeric)

(array([0]), array([0]))

In [10]:
filters = ["A", "Integers", "B", "Numeric"]

actual = df.conditional_join(
            right,
            ("A", "Integers", "!="),
            ("B", "Numeric", "!="),
            how="inner",
            sort_by_appearance=True,
        )

actual 

# actual = actual.droplevel(level=0, axis=1)
# actual = actual.filter(filters)


Unnamed: 0_level_0,left,left,left,left,left,right,right,right,right,right,right,right
Unnamed: 0_level_1,A,B,C,D,E,Integers,Numeric,Floats,Strings,Booleans,Dates,Dates_Right
0,0,0.0,r,False,1970-01-01,1,1.0,0.0,r,False,1970-01-01,1970-01-01


In [11]:
expected = (
            df.assign(t=1)
            .merge(right.assign(t=1), on="t")
            .dropna(subset=filters)
            .query("A != Integers and B != Numeric")
            .reset_index(drop=True)
        )
    
# expected = expected.filter(columns)

expected

Unnamed: 0,A,B,C,D,E,t,Integers,Numeric,Floats,Strings,Booleans,Dates,Dates_Right
0,0,0.0,r,False,1970-01-01,1,1,1.0,0.0,r,False,1970-01-01,1970-01-01
