# Handling Null Values

In [4]:
import pandas as pd
import numpy as np

In [2]:
coffee = pd.read_csv('../warmup-data/coffee.csv')

In [None]:
# lets imagine that we have some null values
coffee.loc[[0,1], 'Units Sold'] = np.nan

# you will always have a NaN value in your dataset

In [11]:
# to find the NaNs in your dataset we can do
coffee.isna().sum()

Day            0
Coffee Type    0
Units Sold     2
dtype: int64

In [13]:
# we can do a fillna(), we can arbitrarily pick a number or we can do mean
coffee.fillna(coffee['Units Sold'].mean())

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,35.0
1,Monday,Latte,35.0
2,Tuesday,Espresso,30.0
3,Tuesday,Latte,20.0
4,Wednesday,Espresso,35.0
5,Wednesday,Latte,25.0
6,Thursday,Espresso,40.0
7,Thursday,Latte,30.0
8,Friday,Espresso,45.0
9,Friday,Latte,35.0


In [None]:
# or we can conditionally fill this based on a "criteria"
# you can use interpolate - when there is a pattern, it can find an estimated value to plug in
# in this case, it didn't work but you can use this to test out - this looks at the neighbors to extract an approximate
coffee.fillna(coffee['Units Sold'].interpolate())

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,
1,Monday,Latte,
2,Tuesday,Espresso,30.0
3,Tuesday,Latte,20.0
4,Wednesday,Espresso,35.0
5,Wednesday,Latte,25.0
6,Thursday,Espresso,40.0
7,Thursday,Latte,30.0
8,Friday,Espresso,45.0
9,Friday,Latte,35.0


In [17]:
# lets try something, let me repopulate the units sold
coffee.loc[[0,1], 'Units Sold'] = 15
coffee

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,15.0
1,Monday,Latte,15.0
2,Tuesday,Espresso,30.0
3,Tuesday,Latte,20.0
4,Wednesday,Espresso,35.0
5,Wednesday,Latte,25.0
6,Thursday,Espresso,40.0
7,Thursday,Latte,30.0
8,Friday,Espresso,45.0
9,Friday,Latte,35.0


In [20]:
# lets interpolate as a test
coffee.loc[[2,3], 'Units Sold'] = np.nan

In [25]:
coffee['Units Sold'] = coffee['Units Sold'].interpolate()

In [27]:
# now you can see that after interpolation, our dataset got filled with the values that are determined by the estimates
# around the idx's neighbors
coffee

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,15.0
1,Monday,Latte,15.0
2,Tuesday,Espresso,21.666667
3,Tuesday,Latte,28.333333
4,Wednesday,Espresso,35.0
5,Wednesday,Latte,25.0
6,Thursday,Espresso,40.0
7,Thursday,Latte,30.0
8,Friday,Espresso,45.0
9,Friday,Latte,35.0
