In [19]:
"""
None: Pythonic misssing data
"""
import numpy as np
import pandas as pd

# None objects as missing values
arr1 = np.array([1, None, 3, 4])
print(arr1.dtype)

arr2 = np.array([1, 2, 3, 4])
print(arr2.dtype)

object
int32


In [33]:
# Python objects are incompatible with numpy and pandas operations
arr1.sum()

nan

In [21]:
"""
NaN: Missing Numerical Data
"""

arr1 = np.array([1, np.nan, 3, 4])
arr1.dtype

dtype('float64')

In [25]:
# Arithmetic with NaN will be another Nan
print(1 + np.nan)
print(1 * np.nan)
print(np.nan / np.nan)

nan
nan
nan


In [35]:
# Special NumPy aggregation funcs that ignore these missing values
np.nansum(arr1)

8.0

In [38]:
# Pandas automatically converts the None to a NaN value.

ser = pd.Series([1, np.nan, 2, None])
print(ser, '\n')

ser1 = pd.Series([1, np.nan, '2', None])
print(ser1)

0    1.0
1    NaN
2    2.0
3    NaN
dtype: float64 

0       1
1     NaN
2       2
3    None
dtype: object


In [42]:
"""
Detecting null values
"""

print(ser,'\n')
# isnull()
ser.isnull()

0    1.0
1    NaN
2    2.0
3    NaN
dtype: float64 



0    False
1     True
2    False
3     True
dtype: bool

In [44]:
# notnull()
print(ser, '\n')
ser.notnull()

0    1.0
1    NaN
2    2.0
3    NaN
dtype: float64 



0     True
1    False
2     True
3    False
dtype: bool

In [55]:
"""
Dropping null values
"""

# dropna()
print(ser.dropna(), '\n')
print(ser.dropna(how='all'))

0    1.0
2    2.0
dtype: float64 

0    1.0
2    2.0
dtype: float64


In [63]:
# For a DataFrame, there are mor options

df = pd.DataFrame([[1, np.nan, 2],
                   [2, 3, 5],
                   [np.nan, 4, 6]])
df

Unnamed: 0,0,1,2
0,1.0,,2
1,2.0,3.0,5
2,,4.0,6


In [67]:
# df.dropna(): list-wise deletion
print(df.dropna(axis='columns', how='any'), '\n')
print(df.dropna(axis='rows', how='all'))

   2
0  2
1  5
2  6 

     0    1  2
0  1.0  NaN  2
1  2.0  3.0  5
2  NaN  4.0  6


In [68]:
"""
Filling Null Values
"""

'\nFilling Null Values\n'

In [74]:
# Fill null values with a certain value

ser = pd.Series([1, np.nan, 2, None, 3], index=list('abcde'))
print(ser.fillna(0), '\n')
print(ser.fillna(-1))

a    1.0
b    0.0
c    2.0
d    0.0
e    3.0
dtype: float64 

a    1.0
b   -1.0
c    2.0
d   -1.0
e    3.0
dtype: float64


In [77]:
# Forward-fill = LOCF

print(ser.fillna(method='ffill'), '\n')

print(ser.ffill())

a    1.0
b    1.0
c    2.0
d    2.0
e    3.0
dtype: float64 

a    1.0
b    1.0
c    2.0
d    2.0
e    3.0
dtype: float64


In [79]:
# Backward-fill = NOCB

print(ser.fillna(method='bfill'), '\n')

print(ser.bfill())

a    1.0
b    2.0
c    2.0
d    3.0
e    3.0
dtype: float64 

a    1.0
b    2.0
c    2.0
d    3.0
e    3.0
dtype: float64
