In [1]:
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', None)

# Arithmetic

In [2]:
# None + 5 # is a type error

In [3]:
np.nan + 5

nan

# Automatic Conversion

In [4]:
# For numeric Series, None converted to NaN
pd.Series([3, None, np.nan]) 

0    3.0
1    NaN
2    NaN
dtype: float64

In [5]:
# For non-numeric Series, None does not get casted to NaN
pd.Series(["3", None, np.nan])

0       3
1    None
2     NaN
dtype: object

# Equality

In [6]:
None == None

True

In [7]:
np.nan == np.nan

False

In [8]:
# equating anything with NaN will result in False:
np.nan == None

False

# Detect Missing Values

In [9]:
# to check nan or None
pd.isnull(np.nan)

True

In [10]:
pd.isnull(None)

True

## Series

In [11]:
data = pd.Series([1, np.nan, 'hello', None])

In [12]:
data.isnull()

0    False
1     True
2    False
3     True
dtype: bool

In [13]:
data.isnull().sum()

np.int64(2)

In [14]:
data[data.notnull()]

0        1
2    hello
dtype: object

## DataFrame

In [15]:
df = pd.DataFrame([[1, np.nan, 2],
                   [2, 3, 5],
                   [np.nan, 4, 6]],
                  columns=['a', 'b', 'c'])
df

Unnamed: 0,a,b,c
0,1.0,,2
1,2.0,3.0,5
2,,4.0,6


In [16]:
df.isnull()

Unnamed: 0,a,b,c
0,False,True,False
1,False,False,False
2,True,False,False


In [17]:
# column wise number of nulls
df.isnull().sum()

a    1
b    1
c    0
dtype: int64

# Dropping Null Values

## Series

In [18]:
data

0        1
1      NaN
2    hello
3     None
dtype: object

In [19]:
data.dropna()

0        1
2    hello
dtype: object

## DataFrame

In [20]:
df = pd.DataFrame([[1, np.nan, 2],
                   [2, 3, 5],
                   [np.nan, 4, 6]],
                  columns=['a', 'b', 'c'])

df

Unnamed: 0,a,b,c
0,1.0,,2
1,2.0,3.0,5
2,,4.0,6


In [21]:
# by default drop rows with any nulls
df.dropna() 

Unnamed: 0,a,b,c
1,2.0,3.0,5


In [22]:
df.dropna(axis='columns')

Unnamed: 0,c
0,2
1,5
2,6


## DataFrame Finer Control

In [23]:
df[3] = np.nan
df

Unnamed: 0,a,b,c,3
0,1.0,,2,
1,2.0,3.0,5,
2,,4.0,6,


In [83]:
# how = 'any' or 'all'
df.dropna(axis='columns', how='all') 

Unnamed: 0,a,b,c
0,1.0,,2
1,2.0,3.0,5
2,,4.0,6


In [24]:
df.dropna(axis='rows', how='all') 

Unnamed: 0,a,b,c,3
0,1.0,,2,
1,2.0,3.0,5,
2,,4.0,6,


In [25]:
# specify a minimum number of non-null values for the row/column to be kept
df.dropna(axis='rows', thresh=3)

Unnamed: 0,a,b,c,3
1,2.0,3.0,5,


# Filling Null Values

## Series

In [85]:
data = pd.Series(
    [1, np.nan, 2, None, 3], 
    index=list('abcde'), 
    dtype='Int32')
data

a       1
b    <NA>
c       2
d    <NA>
e       3
dtype: Int32

In [86]:
data.fillna(0)

a    1
b    0
c    2
d    0
e    3
dtype: Int32

In [87]:
# forward fill
data.ffill() # or bfill

a    1
b    1
c    2
d    2
e    3
dtype: Int32

## DataFrame

In [90]:
df = pd.DataFrame([[1, np.nan, 2],
                   [np.nan, 3, 5],
                   [2, 4, 6]],
                  columns=['a', 'b', 'c'])

df

Unnamed: 0,a,b,c
0,1.0,,2
1,,3.0,5
2,2.0,4.0,6


In [92]:
# for df also specify an axis
df.ffill(axis=0) 

Unnamed: 0,a,b,c
0,1.0,,2
1,1.0,3.0,5
2,2.0,4.0,6
