In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame(np.random.randn(5,5))
df

Unnamed: 0,0,1,2,3,4
0,1.173105,-0.151731,-0.155532,0.237192,1.150662
1,0.518851,-0.813786,0.357358,1.132077,-1.137039
2,-0.779779,-0.555208,-0.418976,-2.4446,-0.158618
3,-0.813416,0.630976,0.053594,-0.270843,-0.321203
4,1.24456,-0.290644,-0.991455,-0.213903,0.41613


In [4]:
df[df > 0.9] = pd.np.nan
df

Unnamed: 0,0,1,2,3,4
0,,-0.151731,-0.155532,0.237192,
1,0.518851,-0.813786,0.357358,,-1.137039
2,-0.779779,-0.555208,-0.418976,-2.4446,-0.158618
3,-0.813416,0.630976,0.053594,-0.270843,-0.321203
4,,-0.290644,-0.991455,-0.213903,0.41613


In [5]:
# Now if we chain a .sum() method on, instead of getting the total sum of missing values, we’re given a list of all the summations of each column:
df.isnull().sum()

0    2
1    0
2    0
3    1
4    1
dtype: int64

In [6]:
# We can see in this example, our first column contains three missing values, along with one each in column 2 and 3 as well.

# In order to get the total summation of all missing values in the DataFrame, we chain two .sum() methods together:

df.isnull().sum().sum()

4

In [7]:
# let’s create a simple Series in pandas
s = pd.Series([2,3,np.nan,7,"The Hobbit"])

In [8]:
# To test the isnull() method on this series, we can use s.isnull() and view the output
s.isnull()

0    False
1    False
2     True
3    False
4    False
dtype: bool

In [9]:
# As expected, the only value evaluated as missing is index 2.
# While the isnull() method is useful, sometimes we may wish to evaluate whether any value is missing in a Series.
# There are a few possibilities involving chaining multiple methods together.
# The fastest method is performed by chaining .values.any():
s.isnull().values.any()

True

In [10]:
# In some cases, you may wish to determine how many missing values exist in the collection, in which case you can use .sum() chained on
s.isnull().sum()

1