In [1]:
import numpy as np
import pandas as pd


In [2]:
np.random.seed(200)
df = pd.DataFrame({
    "A":np.arange(4),
    "B":np.random.randint(1,30, 4),
    "C": np.random.rand(4)
})
df

Unnamed: 0,A,B,C
0,0,27,0.59442
1,1,10,0.428309
2,2,17,0.764141
3,3,5,0.002861


#### Boolean Reduction

In [3]:
(df > 5).all()

A    False
B    False
C    False
dtype: bool

In [4]:
# You can reduce to a final boolean value.
(df > 4).all().any()

True

In [5]:
# You can test if a pandas object is empty, via the empty property.
df.empty

False

##### Comparing if objects are equivalent

In [6]:
df + df == df * 2

Unnamed: 0,A,B,C
0,True,True,True
1,True,True,True
2,True,True,True
3,True,True,True


In [7]:
# So, NDFrames (such as Series and DataFrames) have an equals() method for testing equality, with NaNs in corresponding locations treated as equal.

(df + df).equals(df * 2)

True

In [8]:
# Note that the Series or DataFrame index needs to be in the same order for equality to be True:
df1 = pd.DataFrame({"col": ["foo", 0, np.nan]})

df2 = pd.DataFrame({"col": [np.nan, 0, "foo"]}, index=[2, 1, 0])

df1.equals(df2)
# False

df1.equals(df2.sort_index())

True

#### Comparing array-like objects

In [9]:
# You can conveniently perform element-wise comparisons when comparing a pandas data structure with a scalar value:
pd.Series(["foo", "bar", "baz"]) == "foo"

0     True
1    False
2    False
dtype: bool

#### Combining overlapping data sets
- A problem occasionally arising is the combination of two similar data sets where values in one are preferred over the other. An example would be two data series representing a particular economic indicator where one is considered to be of “higher quality”. However, the lower quality series might extend further back in history or have more complete data coverage. As such, we would like to combine two DataFrame objects where missing values in one DataFrame are conditionally filled with like-labeled values from the other DataFrame. The function implementing this operation is combine_first(), which we illustrate:

In [10]:
df1 = pd.DataFrame(
    {"A": [1.0, np.nan, 3.0, 5.0, np.nan], "B": [np.nan, 2.0, 3.0, np.nan, 6.0]}
)


df2 = pd.DataFrame(
    {
        "A": [5.0, 2.0, 4.0, np.nan, 3.0, 7.0],
        "B": [np.nan, np.nan, 3.0, 4.0, 6.0, 8.0],
    }
)


In [11]:
df1,df2

(     A    B
 0  1.0  NaN
 1  NaN  2.0
 2  3.0  3.0
 3  5.0  NaN
 4  NaN  6.0,
      A    B
 0  5.0  NaN
 1  2.0  NaN
 2  4.0  3.0
 3  NaN  4.0
 4  3.0  6.0
 5  7.0  8.0)

In [12]:
df1.combine_first(df2)

Unnamed: 0,A,B
0,1.0,
1,2.0,2.0
2,3.0,3.0
3,5.0,4.0
4,3.0,6.0
5,7.0,8.0


In [13]:
def combiner(x, y):
    
     return np.where(pd.isna(x), y, x)
    
df1.combine(df2, combiner)

Unnamed: 0,A,B
0,1.0,
1,2.0,2.0
2,3.0,3.0
3,5.0,4.0
4,3.0,6.0
5,7.0,8.0
