In [1]:
import numpy as np
import pandas as pd

<h3>Pandas Utility Functions</h3>

In [2]:
pd.isnull(np.nan)

True

In [3]:
pd.isnull(None)

True

In [4]:
pd.isna(np.nan)

True

In [6]:
pd.isna(None)

True

In [7]:
pd.notnull(np.nan)

False

In [8]:
pd.notnull(None)

False

In [9]:
pd.notnull(3)

True

In [10]:
pd.notna(np.nan)

False

In [11]:
pd.notna(None)

False

In [12]:
pd.notna(3)

True

In [14]:
pd.isnull(pd.Series([1, np.nan, 7]))

0    False
1     True
2    False
dtype: bool

In [15]:
pd.notnull(pd.Series([1, np.nan, 7]))

0     True
1    False
2     True
dtype: bool

In [16]:
pd.isnull(pd.DataFrame({
    'Column 1':[1, np.nan, 7],
    'Column 2':[np.nan, 2, 3],
    'Column 3':[np.nan, 2, np.nan]
}))

Unnamed: 0,Column 1,Column 2,Column 3
0,False,True,True
1,True,False,False
2,False,False,True


<h3>Pandas Operations with Missing Values</h3>

In [18]:
pd.Series([1,2,np.nan]).sum()

3.0

In [19]:
pd.Series([1,2,np.nan]).mean()

1.5

In [20]:
pd.Series([1,2,np.nan]).count()

2

<h3>Filtering missing data</h3>

In [21]:
s = pd.Series([1,2,3,np.nan,np.nan,4])

In [22]:
s

0    1.0
1    2.0
2    3.0
3    NaN
4    NaN
5    4.0
dtype: float64

In [23]:
pd.notnull(s)

0     True
1     True
2     True
3    False
4    False
5     True
dtype: bool

In [24]:
pd.notnull(s).sum()

4

In [25]:
pd.isna(s)

0    False
1    False
2    False
3     True
4     True
5    False
dtype: bool

In [28]:
pd.isna(s).sum()

2

In [29]:
s[pd.notna(s)]

0    1.0
1    2.0
2    3.0
5    4.0
dtype: float64

In [33]:
s[pd.isna(s)]

3   NaN
4   NaN
dtype: float64

<p>isnull or notnull are methods of series and also can use in this way</p>

In [34]:
s.notnull()

0     True
1     True
2     True
3    False
4    False
5     True
dtype: bool

In [35]:
s.isnull()

0    False
1    False
2    False
3     True
4     True
5    False
dtype: bool

In [37]:
s[s.notnull()]

0    1.0
1    2.0
2    3.0
5    4.0
dtype: float64

<h3>Dropping null values</h3>

In [38]:
s

0    1.0
1    2.0
2    3.0
3    NaN
4    NaN
5    4.0
dtype: float64

In [39]:
s.dropna()

0    1.0
1    2.0
2    3.0
5    4.0
dtype: float64

<h3>Dropping null values on DataFrames</h3>

In [40]:
df = pd.DataFrame({
    'Column 1': [1, np.nan, 30, np.nan],
    'Column 2': [2, 8, 31, np.nan],
    'Column 3': [np.nan, 9, 32, 100],
    'Column 4': [5, 8, 34, 110]
})

In [41]:
df

Unnamed: 0,Column 1,Column 2,Column 3,Column 4
0,1.0,2.0,,5
1,,8.0,9.0,8
2,30.0,31.0,32.0,34
3,,,100.0,110


In [42]:
df.shape

(4, 4)

In [43]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Column 1  2 non-null      float64
 1   Column 2  3 non-null      float64
 2   Column 3  3 non-null      float64
 3   Column 4  4 non-null      int64  
dtypes: float64(3), int64(1)
memory usage: 256.0 bytes


In [44]:
df.isnull()

Unnamed: 0,Column 1,Column 2,Column 3,Column 4
0,False,False,True,False
1,True,False,False,False
2,False,False,False,False
3,True,True,False,False


In [47]:
df.isnull().sum()

Column 1    2
Column 2    1
Column 3    1
Column 4    0
dtype: int64

In [48]:
df.dropna()

Unnamed: 0,Column 1,Column 2,Column 3,Column 4
2,30.0,31.0,32.0,34


In [49]:
df.dropna(axis=1)

Unnamed: 0,Column 4
0,5
1,8
2,34
3,110


In [50]:
df2 = pd.DataFrame({
    'Column A': [1, np.nan, 30],
    'Column B': [2, np.nan, 31],
    'Column C': [np.nan, np.nan, 100]
})

In [51]:
df2

Unnamed: 0,Column A,Column B,Column C
0,1.0,2.0,
1,,,
2,30.0,31.0,100.0


In [52]:
df2.dropna(how='all')

Unnamed: 0,Column A,Column B,Column C
0,1.0,2.0,
2,30.0,31.0,100.0


In [53]:
df2.dropna(how='any')

Unnamed: 0,Column A,Column B,Column C
2,30.0,31.0,100.0


In [54]:
df

Unnamed: 0,Column 1,Column 2,Column 3,Column 4
0,1.0,2.0,,5
1,,8.0,9.0,8
2,30.0,31.0,32.0,34
3,,,100.0,110


In [56]:
df.dropna(thresh=3)

Unnamed: 0,Column 1,Column 2,Column 3,Column 4
0,1.0,2.0,,5
1,,8.0,9.0,8
2,30.0,31.0,32.0,34


In [57]:
df.dropna(thresh=3,axis=1)

Unnamed: 0,Column 2,Column 3,Column 4
0,2.0,,5
1,8.0,9.0,8
2,31.0,32.0,34
3,,100.0,110


<h3>Filling null values</h3>

In [59]:
s

0    1.0
1    2.0
2    3.0
3    NaN
4    NaN
5    4.0
dtype: float64

In [60]:
s.fillna(0)

0    1.0
1    2.0
2    3.0
3    0.0
4    0.0
5    4.0
dtype: float64

In [61]:
s.fillna(s.mean())

0    1.0
1    2.0
2    3.0
3    2.5
4    2.5
5    4.0
dtype: float64

<h4>Filling nulls with close values<h4>

In [62]:
s

0    1.0
1    2.0
2    3.0
3    NaN
4    NaN
5    4.0
dtype: float64

In [63]:
s.fillna(method='ffill')

0    1.0
1    2.0
2    3.0
3    3.0
4    3.0
5    4.0
dtype: float64

In [64]:
s.fillna(method='bfill')

0    1.0
1    2.0
2    3.0
3    4.0
4    4.0
5    4.0
dtype: float64

In [65]:
pd.Series([np.nan,0,1,2,3]).fillna(method='ffill')

0    NaN
1    0.0
2    1.0
3    2.0
4    3.0
dtype: float64

In [66]:
pd.Series([0,1,2,3,np.nan,np.nan]).fillna(method='bfill')

0    0.0
1    1.0
2    2.0
3    3.0
4    NaN
5    NaN
dtype: float64

<h3>Filling Null Values of Dataframes</h3>

In [67]:
df

Unnamed: 0,Column 1,Column 2,Column 3,Column 4
0,1.0,2.0,,5
1,,8.0,9.0,8
2,30.0,31.0,32.0,34
3,,,100.0,110


In [68]:
df.fillna(0)

Unnamed: 0,Column 1,Column 2,Column 3,Column 4
0,1.0,2.0,0.0,5
1,0.0,8.0,9.0,8
2,30.0,31.0,32.0,34
3,0.0,0.0,100.0,110


In [73]:
df.fillna({'Column 1':0,
          'Column 2':99,
          'Column 3':df['Column 3'].mean()})

Unnamed: 0,Column 1,Column 2,Column 3,Column 4
0,1.0,2.0,47.0,5
1,0.0,8.0,9.0,8
2,30.0,31.0,32.0,34
3,0.0,99.0,100.0,110


In [74]:
df.fillna(method='ffill',axis=0)

Unnamed: 0,Column 1,Column 2,Column 3,Column 4
0,1.0,2.0,,5
1,1.0,8.0,9.0,8
2,30.0,31.0,32.0,34
3,30.0,31.0,100.0,110


In [75]:
df.fillna(method='ffill',axis=1)

Unnamed: 0,Column 1,Column 2,Column 3,Column 4
0,1.0,2.0,2.0,5.0
1,,8.0,9.0,8.0
2,30.0,31.0,32.0,34.0
3,,,100.0,110.0


<h3>Checking if there are NAs</h3>

In [81]:
s

0    1.0
1    2.0
2    3.0
3    NaN
4    NaN
5    4.0
dtype: float64

In [78]:
s.dropna().count()

4

In [82]:
s

0    1.0
1    2.0
2    3.0
3    NaN
4    NaN
5    4.0
dtype: float64

In [86]:
len(s.dropna())

4

In [87]:
len(s)

6

In [91]:
missing_values = len(s.dropna()) != len(s)

In [92]:
missing_values

True

In [93]:
s.count()

4

In [94]:
missing_values = s.count() != len(s)

In [95]:
missing_values

True

<h4>More pythonic solutions</h4>

In [99]:
pd.Series([True,True,False]).any()

True

In [100]:
pd.Series([True,True,False]).all()

False

In [102]:
pd.Series([True,True,True]).all()

True