In [5]:
"""
None: Pythonic missing data
"""

import pandas as pd
import numpy as np

# None objects as missing values
arr1 = np.array([1, None, 3, 4])
print(arr1.dtype)
arr2 = np.array([1,2,3,4])
print(arr2.dtype)

object
int32


In [7]:
# Python objects are imcompatible with numpy and pandas operations
arr1.sum()

TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'

In [10]:
"""
NaN : Missing numerical data
"""

arr3 = np.array([1, np.nan, 3, 4])
print(arr3.dtype)

float64


In [13]:
# Arithmetic with NaN will be another NaN
arr3.sum()

nan

In [16]:
# Special Numpy aggregation funcs that ignore these missing values
np.nanmax(arr3)

4.0

In [23]:
# Pandas automatically converts the None to a NaN value.

ser = pd.Series([1, np.nan, 2, None])
print(ser)
ser = pd.Series([1, np.nan, '2', None])
print(ser)

0    1.0
1    NaN
2    2.0
3    NaN
dtype: float64
0       1
1     NaN
2       2
3    None
dtype: object


In [29]:
"""
Detecting null values
"""

ser = pd.Series([1, np.nan, 'hello', None])
print(ser, '\n')
ser.isnull()

0        1
1      NaN
2    hello
3     None
dtype: object 



0    False
1     True
2    False
3     True
dtype: bool

In [67]:
"""
Dropping Null Values
"""
from pandas import DataFrame
df = np.array([[1.0, np.nan, 2],
             [2.0, 3.0, 5],
             [np.nan, 4.0, 6]])
df = pd.DataFrame(df)

# df.dropna() : list-wise deletion
print(df.dropna())
print(df.dropna(axis='columns'))

     0    1    2
1  2.0  3.0  5.0
     2
0  2.0
1  5.0
2  6.0


In [72]:
# how = 'any' (defualt)
print(df.dropna(how='any', axis='columns'))

# how = 'all' 
print(df.dropna(how='all', axis='columns'))

     2
0  2.0
1  5.0
2  6.0
     0    1    2
0  1.0  NaN  2.0
1  2.0  3.0  5.0
2  NaN  4.0  6.0


In [73]:
# thresh = minimum number
df.dropna(axis='rows', thresh=3)

Unnamed: 0,0,1,2
1,2.0,3.0,5.0


In [41]:
"""
Filling Null Values
"""

ser.fillna(0)

0        1
1        0
2    hello
3        0
dtype: object

In [45]:
# Forward-fill = LOCF
ser.fillna(method='ffill')

0        1
1        1
2    hello
3    hello
dtype: object

In [46]:
# Backward-fill = NOCB
ser.fillna(method='bfill')

0        1
1    hello
2    hello
3     None
dtype: object