In [1]:
import pandas as pd
import numpy as np

In [3]:
data = pd.Series(['one','two',np.nan,'four'])

data

0     one
1     two
2     NaN
3    four
dtype: object

In [4]:
data.isnull() # will return boolean values, returning True where the value is NaN

0    False
1    False
2     True
3    False
dtype: bool

In [5]:
# We can also drop NaN (null) values
data.dropna()

0     one
1     two
3    four
dtype: object

In [7]:
# Lets check this out for a dataframe
dframe = pd.DataFrame([[1,2,3],[np.nan,5,6],[7,np.nan,9],[np.nan,np.nan,np.nan]])

dframe

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0
3,,,


In [9]:
clean_dframe = dframe.dropna()
clean_dframe

Unnamed: 0,0,1,2
0,1.0,2.0,3.0


Every rows that had a NaN (null) values got dropped.

In [11]:
dframe.dropna(how='all')

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0


how='all' deletes the rows where all the values are NaN (null), by default it is set to how='any', that is it drops the rows that has any NaN (null) values.

In [12]:
# If we want to drop NaN columns instead of rows
dframe.dropna(axis=1)

0
1
2
3


All the columns were dropped as every column had atleast one NaN value.

In [13]:
dframe.dropna(axis=1,how='all')

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0
3,,,


None of the columns were dropped as none of the columns had all NaN values.

In [14]:
dframe2 = pd.DataFrame([[1,2,3,np.nan],[2,np.nan,5,6],[np.nan,7,np.nan,9],[1,np.nan,np.nan,np.nan]])

dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [15]:
dframe2.dropna(thresh=2)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0


"thresh" allows us to drop only those rows that has atleast the given (in the above case 2) number of not null values. Thus in the above case row 3 got dropped as it had only 1 not NaN values.

In [16]:
dframe2.dropna(thresh=3)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0


In [17]:
# Instead of dropping all the null values, we can fill them with some default value
dframe2.fillna(0)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,0.0
1,2.0,0.0,5.0,6.0
2,0.0,7.0,0.0,9.0
3,1.0,0.0,0.0,0.0


In [18]:
# We can also fill different values for different columns
dframe2.fillna({0:'a',1:'b',2:'c',3:'d'})

Unnamed: 0,0,1,2,3
0,1,2,3,d
1,2,b,5,6
2,a,7,c,9
3,1,b,c,d
