In [1]:
import pandas as pd
df1 = pd.read_csv('../csv/file1.csv',sep="\s+")
df2 = pd.read_csv('../csv/file2.csv',sep="\s+")

In [2]:
df1

Unnamed: 0,name,type,value
0,Mike,a+,98
1,Jery,a-,144
2,Tomy,b,108


In [3]:
df2

Unnamed: 0,type,low,high
0,a+,78,97
1,a-,108,143
2,b,108,150


## Similar sized dataframes

In [4]:
import numpy as np
df1['low_value'] = np.where(df1.type == df2.type, 'True', 'False')

In [5]:
df1['low_value']

0    True
1    True
2    True
Name: low_value, dtype: object

In [6]:
# compare using np.where whether values from first dataframe has match in the column of the second
import numpy as np
df1['low_high'] = np.where(df1.value < df2.high, 'True', 'False')

In [7]:
df1['low_high']

0    False
1    False
2     True
Name: low_high, dtype: object

In [8]:
# Compare one column from first against two from second dataframe
df1['low_high_value'] = np.where((df1.value >= df2.low) & (df1.value <= df2.high), 'True', 'False')

In [9]:
df1['low_high_value']

0    False
1    False
2     True
Name: low_high_value, dtype: object

In [10]:
# Compare two columns from first against two from second dataframe
np.where((df1.type == df2.type) & (df1.value <= df2.high), 'True', 'False')

array(['False', 'False', 'True'], dtype='<U5')

In [11]:
df1 = df1.drop(['low_value', 'low_high', 'low_high_value'], axis=1)

In [12]:
# compare data as Boolean Series and join it the result to first dataframe
df3 = [(df2.type.isin(df1.type)) & (df1.value.between(df2.low,df2.high,inclusive=True))]
df1.join(df3)

Unnamed: 0,name,type,value,0
0,Mike,a+,98,False
1,Jery,a-,144,False
2,Tomy,b,108,True


In [13]:
# compare data and assign it as new column to first data frame
df1['enh1'] = pd.Series((df2.type.isin(df1.type)) & (df1.value >= df2.low) & (df1.value <= df2.high))

In [14]:
df1

Unnamed: 0,name,type,value,enh1
0,Mike,a+,98,False
1,Jery,a-,144,False
2,Tomy,b,108,True


In [15]:
# compare with 3 conditions and or clause. You can use any valid python code
df1['enh2'] = pd.Series((df2.type.isin(df1.type)) & (df1.value != df2.low)  | (df1.value + 1 == df2.high))

In [16]:
df1

Unnamed: 0,name,type,value,enh1,enh2
0,Mike,a+,98,False,True
1,Jery,a-,144,False,True
2,Tomy,b,108,True,False


## Different sized dataframes

In [17]:
# add new row for dataframe 2
df2 = df2.append({'type':'0', 'low':143, 'high':108}, ignore_index=True)

In [18]:
merged = df1.merge(df2,how='outer',left_on=['type'],right_on=["type"])

In [19]:
merged

Unnamed: 0,name,type,value,enh1,enh2,low,high
0,Mike,a+,98.0,False,True,78,97
1,Jery,a-,144.0,False,True,108,143
2,Tomy,b,108.0,True,False,108,150
3,,0,,,,143,108


In [20]:
merged[(merged.value >= merged.low) & (merged.value <= merged.high)]

Unnamed: 0,name,type,value,enh1,enh2,low,high
2,Tomy,b,108.0,True,False,108,150


# Error ValueError: Can only compare identically-labeled Series objects

In [21]:
# demo of error - ValueError: Can only compare identically-labeled Series objects 
import numpy as np
df1['low_high'] = np.where(df1.value < df2.high, 'True', 'False')

ValueError: Can only compare identically-labeled Series objects

In [22]:
df2.drop(3, inplace=True)

In [23]:
# demo of error - Now is working because of equal rows
import numpy as np
df1['low_high'] = np.where(df1.value < df2.high, 'True', 'False')

In [24]:
# how to cause it on first dataframes
df1.set_index([pd.Index([1, 2, 3])], inplace=True)

In [25]:
# demo of error - ValueError: Can only compare identically-labeled Series objects because of mismatching indexes
import numpy as np
df1['low_high'] = np.where(df1.value < df2.high, 'True', 'False')

ValueError: Can only compare identically-labeled Series objects

In [26]:
# possible solution for - ValueError: Can only compare identically-labeled Series objects
df1.sort_index(inplace=True)

In [27]:
# possible solution for - ValueError: Can only compare identically-labeled Series objects
df1.reset_index(inplace=True)

In [28]:
# demo of error - ValueError: Can only compare identically-labeled Series objects
import numpy as np
df1['low_high'] = np.where(df1.value < df2.high, 'True', 'False')

In [29]:
df1['low_high']

0    False
1    False
2     True
Name: low_high, dtype: object