In [1]:
import numpy as np 
import pandas as pd

In [2]:
pd.isnull(np.nan)

True

In [3]:
pd.isnull(None)

True

In [4]:
pd.isna(np.nan)

True

In [5]:
pd.isna(None)

True

In [9]:
pd.notnull(3)

True

In [11]:
pd.notna(np.nan)

False

In [12]:
pd.isnull(pd.Series([1,2,3,np.nan,4]))

0    False
1    False
2    False
3     True
4    False
dtype: bool

In [13]:
pd.notnull(pd.Series([1,2,3,np.nan,4]))

0     True
1     True
2     True
3    False
4     True
dtype: bool

In [14]:
pd.isnull(pd.Series({
    'Column A':[1,np.nan,3],
    'Column B':[np.nan,5,6],
    'Column C':[7,8,np.nan]
}))

Column A    False
Column B    False
Column C    False
dtype: bool

# Pandas Operation with Missing Values

In [15]:
pd.Series([1,2,3,np.nan]).count()

3

In [16]:
pd.Series([1,2,3,np.nan]).sum()

6.0

In [17]:
pd.Series([1,2,3,np.nan]).mean()

2.0

# Filtering missing data

In [18]:
series1 = pd.Series([1,2,3,4,5,np.nan])

In [19]:
pd.notnull(series1)

0     True
1     True
2     True
3     True
4     True
5    False
dtype: bool

In [23]:
pd.notnull(series1).count()

6

In [22]:
series1[pd.notnull(series1)]

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
dtype: float64

In [25]:
series1[series1.notnull()]

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
dtype: float64

# Dropping null values

In [27]:
series1.dropna()

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
dtype: float64

In [28]:
series1

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    NaN
dtype: float64

# Dropping null values on DataFrames

In [60]:
df = pd.DataFrame({
    'Column A':[1,np.nan,3],
    'Column B':[np.nan,5,6],
    'Column C':[7,8,np.nan],
    'Column D':[10,np.nan,12],
    'Column E':[np.nan,14,15]
})

In [30]:
df

Unnamed: 0,Column A,Column B,Column C,Column D,Column E
0,1.0,,7.0,10.0,
1,,5.0,8.0,,14.0
2,3.0,6.0,,12.0,


In [31]:
df.isnull()

Unnamed: 0,Column A,Column B,Column C,Column D,Column E
0,False,True,False,False,True
1,True,False,False,True,False
2,False,False,True,False,True


In [32]:
df.isnull().sum()

Column A    1
Column B    1
Column C    1
Column D    1
Column E    2
dtype: int64

In [33]:
df.dropna()

Unnamed: 0,Column A,Column B,Column C,Column D,Column E


In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Column A  2 non-null      float64
 1   Column B  2 non-null      float64
 2   Column C  2 non-null      float64
 3   Column D  2 non-null      float64
 4   Column E  1 non-null      float64
dtypes: float64(5)
memory usage: 248.0 bytes


In [37]:
df.shape

(3, 5)

In [38]:
df.dropna(how = 'all')

Unnamed: 0,Column A,Column B,Column C,Column D,Column E
0,1.0,,7.0,10.0,
1,,5.0,8.0,,14.0
2,3.0,6.0,,12.0,


In [39]:
df.dropna(how = 'any')

Unnamed: 0,Column A,Column B,Column C,Column D,Column E


In [42]:
df.dropna(thresh= 2)

Unnamed: 0,Column A,Column B,Column C,Column D,Column E
0,1.0,,7.0,10.0,
1,,5.0,8.0,,14.0
2,3.0,6.0,,12.0,


In [47]:
df.dropna(thresh= 3, axis=1)

0
1
2


# Filling null values

In [48]:
series1

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    NaN
dtype: float64

In [49]:
series1.fillna(6.0)

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
dtype: float64

In [54]:
series1.fillna(series1.mean())

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    3.0
dtype: float64

In [51]:
series1.fillna(method='ffill')

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    5.0
dtype: float64

In [52]:
series1.fillna(method='bfill')

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    NaN
dtype: float64

# Filling null values on DataFrames

In [55]:
df

Unnamed: 0,Column A,Column B,Column C,Column D,Column E
0,1.0,,7.0,10.0,
1,,5.0,8.0,,14.0
2,3.0,6.0,,12.0,


In [61]:
df.fillna({
    'Column A':2,
    'Column B':4,
    'Column C':9,
    'Column D':11,
    'Column E': 13
          })

Unnamed: 0,Column A,Column B,Column C,Column D,Column E
0,1.0,4.0,7.0,10.0,13.0
1,2.0,5.0,8.0,11.0,14.0
2,3.0,6.0,9.0,12.0,15.0


In [64]:
df.fillna(method='bfill',axis=0)

Unnamed: 0,Column A,Column B,Column C,Column D,Column E
0,1.0,5.0,7.0,10.0,14.0
1,3.0,5.0,8.0,12.0,14.0
2,3.0,6.0,,12.0,15.0


# Checking if there are NAs

In [65]:
series1.dropna().count()

5

In [67]:
missing_values = len(series1.dropna()) != len(series1)
missing_values

True

In [68]:
len(series1)

6

In [70]:
series1.count()

5

In [72]:
missing_values = series1.count() != len(series1)
missing_values

True

In [73]:
pd.Series([True,False,True]).any()

True

In [74]:
pd.Series([True,False,True]).all()

False

In [75]:
pd.Series([True,True,True]).all()

True

In [77]:
series1.isnull().any()

True