In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

## Figuring out What Data is Missing

In [4]:
missing = np.nan
series_obj = Series(['raw 1','raw 2',missing,'raw 4','raw 5',missing,'raw 7',missing])

In [5]:
series_obj

0    raw 1
1    raw 2
2      NaN
3    raw 4
4    raw 5
5      NaN
6    raw 7
7      NaN
dtype: object

In [8]:
series_obj.isnull()

0    False
1    False
2     True
3    False
4    False
5     True
6    False
7     True
dtype: bool

## Filling missing Values

In [20]:
np.random.seed(25) ## to get same random in diffrent system
DF_obj = DataFrame(np.random.rand(36).reshape(6,6))
DF_obj

Unnamed: 0,0,1,2,3,4,5
0,0.870124,0.582277,0.278839,0.185911,0.4111,0.117376
1,0.684969,0.437611,0.556229,0.36708,0.402366,0.113041
2,0.447031,0.585445,0.161985,0.520719,0.326051,0.699186
3,0.366395,0.836375,0.481343,0.516502,0.383048,0.997541
4,0.514244,0.559053,0.03445,0.71993,0.421004,0.436935
5,0.281701,0.900274,0.669612,0.456069,0.289804,0.525819


In [21]:
DF_obj.loc[3:5, 0] = missing
DF_obj.loc[1:3, 5] = missing
DF_obj

Unnamed: 0,0,1,2,3,4,5
0,0.870124,0.582277,0.278839,0.185911,0.4111,0.117376
1,0.684969,0.437611,0.556229,0.36708,0.402366,
2,0.447031,0.585445,0.161985,0.520719,0.326051,
3,,0.836375,0.481343,0.516502,0.383048,
4,,0.559053,0.03445,0.71993,0.421004,0.436935
5,,0.900274,0.669612,0.456069,0.289804,0.525819


In [23]:
filled_DF = DF_obj.fillna(0)

In [24]:
filled_DF

Unnamed: 0,0,1,2,3,4,5
0,0.870124,0.582277,0.278839,0.185911,0.4111,0.117376
1,0.684969,0.437611,0.556229,0.36708,0.402366,0.0
2,0.447031,0.585445,0.161985,0.520719,0.326051,0.0
3,0.0,0.836375,0.481343,0.516502,0.383048,0.0
4,0.0,0.559053,0.03445,0.71993,0.421004,0.436935
5,0.0,0.900274,0.669612,0.456069,0.289804,0.525819


In [25]:
filled_DF = DF_obj.fillna({0: 0.12, 5: 1.25})

In [28]:
filled_DF

Unnamed: 0,0,1,2,3,4,5
0,0.870124,0.582277,0.278839,0.185911,0.4111,0.117376
1,0.684969,0.437611,0.556229,0.36708,0.402366,1.25
2,0.447031,0.585445,0.161985,0.520719,0.326051,1.25
3,0.12,0.836375,0.481343,0.516502,0.383048,1.25
4,0.12,0.559053,0.03445,0.71993,0.421004,0.436935
5,0.12,0.900274,0.669612,0.456069,0.289804,0.525819


In [30]:
fill_DF = DF_obj.fillna(method='ffill')
fill_DF

Unnamed: 0,0,1,2,3,4,5
0,0.870124,0.582277,0.278839,0.185911,0.4111,0.117376
1,0.684969,0.437611,0.556229,0.36708,0.402366,0.117376
2,0.447031,0.585445,0.161985,0.520719,0.326051,0.117376
3,0.447031,0.836375,0.481343,0.516502,0.383048,0.117376
4,0.447031,0.559053,0.03445,0.71993,0.421004,0.436935
5,0.447031,0.900274,0.669612,0.456069,0.289804,0.525819


## Counting Missing Values

In [33]:
np.random.seed(25) ## to get same random in diffrent system
DF_obj = DataFrame(np.random.rand(36).reshape(6,6))
DF_obj.loc[3:5, 0] = missing
DF_obj.loc[1:3, 5] = missing
DF_obj
DF_obj

Unnamed: 0,0,1,2,3,4,5
0,0.870124,0.582277,0.278839,0.185911,0.4111,0.117376
1,0.684969,0.437611,0.556229,0.36708,0.402366,
2,0.447031,0.585445,0.161985,0.520719,0.326051,
3,,0.836375,0.481343,0.516502,0.383048,
4,,0.559053,0.03445,0.71993,0.421004,0.436935
5,,0.900274,0.669612,0.456069,0.289804,0.525819


In [34]:
DF_obj.isnull().sum()

0    3
1    0
2    0
3    0
4    0
5    3
dtype: int64

In [43]:
DF_no_NaN = DF_obj.dropna() ## Drapping Rows
DF_no_NaN

Unnamed: 0,0,1,2,3,4,5
0,0.870124,0.582277,0.278839,0.185911,0.4111,0.117376


In [46]:
DF_no_NaN = DF_obj.dropna(axis=1) ## Dropping Columns
DF_no_NaN

Unnamed: 0,1,2,3,4
0,0.582277,0.278839,0.185911,0.4111
1,0.437611,0.556229,0.36708,0.402366
2,0.585445,0.161985,0.520719,0.326051
3,0.836375,0.481343,0.516502,0.383048
4,0.559053,0.03445,0.71993,0.421004
5,0.900274,0.669612,0.456069,0.289804
