# 4\. Dealing with NaN

In [4]:
import pandas as pd

### Create a DataFrame

In [6]:
# create a list of Python dictionaries
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35, 'shirts': 15, 'shoes':8, 'suits':45},
          {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5, 'shirts': 2, 'shoes':5, 'suits':7},
          {'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4, 'shoes':10}
         ]
store_items = pd.DataFrame(items2, index=['store 1', 'store 2', 'store 3'])
store_items

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20,30,35,15.0,8,45.0,
store 2,15,5,10,2.0,5,7.0,50.0
store 3,20,30,35,,10,,4.0


### Count the total NaN values

In [12]:
x = store_items.isnull().sum().sum()
print("Number of NaN values in our DataFrame:", x)

Number of NaN values in our DataFrame: 3


### Return boolean True/False for each element if it is a NaN

In [14]:
store_items.isnull()

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,False,False,False,False,False,False,True
store 2,False,False,False,False,False,False,False
store 3,False,False,False,True,False,True,False


### Count NaN down the column

In [16]:
store_items.isnull().sum()

bikes      0
pants      0
watches    0
shirts     1
shoes      0
suits      1
glasses    1
dtype: int64

### Count the total non-NaN values

In [18]:
print("Number of non-NaN values in the columns of our DataFrame:\n", store_items.count())

Number of non-NaN values in the columns of our DataFrame:
 bikes      3
pants      3
watches    3
shirts     2
shoes      3
suits      2
glasses    2
dtype: int64


### Drop rows having NaN values

In [23]:
# drop any rows with NaN values
store_items.dropna()  # axis = 0

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 2,15,5,10,2.0,5,7.0,50.0


### Drop columns having NaN values

In [25]:
# drop any columns with NaN values
store_items.dropna(axis=1)

Unnamed: 0,bikes,pants,watches,shoes
store 1,20,30,35,8
store 2,15,5,10,5
store 3,20,30,35,10


### Replace NaN with 0

In [28]:
# replace all NaN values with 0
store_items.fillna(0)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20,30,35,15.0,8,45.0,0.0
store 2,15,5,10,2.0,5,7.0,50.0
store 3,20,30,35,0.0,10,0.0,4.0


### Forward fill NaN values *down* (axis = 0) the dataframe

In [33]:
# replace NaN values with the previous value in the column
store_items.fillna(method='ffill', axis=0)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20,30,35,15.0,8,45.0,
store 2,15,5,10,2.0,5,7.0,50.0
store 3,20,30,35,2.0,10,7.0,4.0


### Forward fill NaN values *across* (axis = 1) the dataframe

In [34]:
# replace NaN values with the previous value in the row
store_items.fillna(method='ffill', axis=1)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20.0,30.0,35.0,15.0,8.0,45.0,45.0
store 2,15.0,5.0,10.0,2.0,5.0,7.0,50.0
store 3,20.0,30.0,35.0,35.0,10.0,10.0,4.0


### Backward fill NaN values *down* (axis = 0) the dataframe

In [37]:
# replace NaN values with the next value in the column
store_items.fillna(method='backfill', axis=0)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20,30,35,15.0,8,45.0,50.0
store 2,15,5,10,2.0,5,7.0,50.0
store 3,20,30,35,,10,,4.0


### Backward fill NaN values *across* (axis = 1) the dataframe

In [40]:
# replace NaN values with the next value in the row
store_items.fillna(method='backfill', axis=1)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20.0,30.0,35.0,15.0,8.0,45.0,
store 2,15.0,5.0,10.0,2.0,5.0,7.0,50.0
store 3,20.0,30.0,35.0,10.0,10.0,4.0,4.0


### Interpolate (estimate) NaN values *down* (axis = 0) the dataframe

In [43]:
# replace NaN values by using linear interpolation using column values
store_items.interpolate(method='linear', axis=0)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20,30,35,15.0,8,45.0,
store 2,15,5,10,2.0,5,7.0,50.0
store 3,20,30,35,2.0,10,7.0,4.0


### Interpolate (estimate) NaN values across (axis = 1) the dataframe

In [44]:
# replace NaN values by using linear interpolation using row values
store_items.interpolate(method='linear', axis=1)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20.0,30.0,35.0,15.0,8.0,45.0,45.0
store 2,15.0,5.0,10.0,2.0,5.0,7.0,50.0
store 3,20.0,30.0,35.0,22.5,10.0,7.0,4.0
