# Handle Missing Data

In [1]:
import numpy as np
import pandas as pd

In [3]:
# Detecting null values

data = pd.Series([1, np.nan, 'hello', None])
print(data.isnull())
print(data[data.notnull()])

0    False
1     True
2    False
3     True
dtype: bool
0        1
2    hello
dtype: object


In [11]:
# Dropping null values

# Series
print(data.dropna())

df = pd.DataFrame([[1, np.nan, 2],
                   [np.nan, 2, 3],
                   [1, 2, 3]])

print(df.dropna())
print(df.dropna(axis=1))

df[3] = np.nan
print(df)

print(df.dropna(how='all', axis=1))

print(df.dropna(axis='rows', thresh=3))

0        1
2    hello
dtype: object
     0    1  2
2  1.0  2.0  3
   2
0  2
1  3
2  3
     0    1  2   3
0  1.0  NaN  2 NaN
1  NaN  2.0  3 NaN
2  1.0  2.0  3 NaN
     0    1  2
0  1.0  NaN  2
1  NaN  2.0  3
2  1.0  2.0  3
     0    1  2   3
2  1.0  2.0  3 NaN


In [21]:
# Filling null values
data = pd.Series([1, np.nan, 2, None, 3], index=list('abcde'))
print(data)

# Fill with a single value
print(data.fillna(0)) # 1 0 2 0 3
# Forward-fill
print(data.fillna(method='ffill')) # 1 1 2 2 3
print(df.fillna(method='ffill', axis='rows'))
# Backward-fill
print(data.fillna(method='bfill')) # 1 2 2 3 3

a    1.0
b    NaN
c    2.0
d    NaN
e    3.0
dtype: float64
a    1.0
b    0.0
c    2.0
d    0.0
e    3.0
dtype: float64
a    1.0
b    1.0
c    2.0
d    2.0
e    3.0
dtype: float64
     0    1  2   3
0  1.0  NaN  2 NaN
1  1.0  2.0  3 NaN
2  1.0  2.0  3 NaN
a    1.0
b    2.0
c    2.0
d    3.0
e    3.0
dtype: float64
