# How to use fillna to handle missing values

In [3]:
import pandas as pd;

## Reading the csv file 

In [11]:
df=pd.read_csv("../data/TorontoWeather/TorontoWeatherData.csv",parse_dates=['Date'])

In [5]:
df

Unnamed: 0,Station Name,Date,Max Temp (°C),Min Temp (°C),Snow on Grnd (cm),Type
0,TORONTO CITY,2024-01-01,-0.7,-3.1,2.0,Sunny
1,TORONTO CITY,2024-01-04,,-6.0,,Snowy
2,TORONTO CITY,2024-01-05,0.4,-2.1,4.0,Snowy
3,TORONTO CITY,2024-01-06,2.1,,5.0,Snowy
4,TORONTO CITY,2024-01-07,0.7,-4.2,5.0,Sunny
5,TORONTO CITY,2024-01-12,3.9,,8.0,Snowy
6,TORONTO CITY,2024-01-13,6.8,-2.9,10.0,Snowy
7,TORONTO CITY,2024-01-14,,-11.2,8.0,Sunny
8,TORONTO CITY,2024-01-15,-7.1,,,Snowy


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Station Name       9 non-null      object        
 1   Date               9 non-null      datetime64[ns]
 2   Max Temp (°C)      7 non-null      float64       
 3   Min Temp (°C)      6 non-null      float64       
 4   Snow on Grnd (cm)  7 non-null      float64       
 5   Type               9 non-null      object        
dtypes: datetime64[ns](1), float64(3), object(2)
memory usage: 560.0+ bytes


## Set Date as the index column

In [12]:
df.set_index('Date', inplace=True)
df

Unnamed: 0_level_0,Station Name,Max Temp (°C),Min Temp (°C),Snow on Grnd (cm),Type
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01,TORONTO CITY,-0.7,-3.1,2.0,Sunny
2024-01-04,TORONTO CITY,,-6.0,,Snowy
2024-01-05,TORONTO CITY,0.4,-2.1,4.0,Snowy
2024-01-06,TORONTO CITY,2.1,,5.0,Snowy
2024-01-07,TORONTO CITY,0.7,-4.2,5.0,Sunny
2024-01-12,TORONTO CITY,3.9,,8.0,Snowy
2024-01-13,TORONTO CITY,6.8,-2.9,10.0,Snowy
2024-01-14,TORONTO CITY,,-11.2,8.0,Sunny
2024-01-15,TORONTO CITY,-7.1,,,Snowy


## Use fillna() to fill missing values with zero

In [13]:
df1 = df.fillna(0)
df1

Unnamed: 0_level_0,Station Name,Max Temp (°C),Min Temp (°C),Snow on Grnd (cm),Type
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01,TORONTO CITY,-0.7,-3.1,2.0,Sunny
2024-01-04,TORONTO CITY,0.0,-6.0,0.0,Snowy
2024-01-05,TORONTO CITY,0.4,-2.1,4.0,Snowy
2024-01-06,TORONTO CITY,2.1,0.0,5.0,Snowy
2024-01-07,TORONTO CITY,0.7,-4.2,5.0,Sunny
2024-01-12,TORONTO CITY,3.9,0.0,8.0,Snowy
2024-01-13,TORONTO CITY,6.8,-2.9,10.0,Snowy
2024-01-14,TORONTO CITY,0.0,-11.2,8.0,Sunny
2024-01-15,TORONTO CITY,-7.1,0.0,0.0,Snowy


In [14]:
df2 = df.fillna({
    "Max Temp (°C)":0,
    "Min Temp (°C)":0,
    "Snow on Grnd (cm)":0
})
df2

Unnamed: 0_level_0,Station Name,Max Temp (°C),Min Temp (°C),Snow on Grnd (cm),Type
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01,TORONTO CITY,-0.7,-3.1,2.0,Sunny
2024-01-04,TORONTO CITY,0.0,-6.0,0.0,Snowy
2024-01-05,TORONTO CITY,0.4,-2.1,4.0,Snowy
2024-01-06,TORONTO CITY,2.1,0.0,5.0,Snowy
2024-01-07,TORONTO CITY,0.7,-4.2,5.0,Sunny
2024-01-12,TORONTO CITY,3.9,0.0,8.0,Snowy
2024-01-13,TORONTO CITY,6.8,-2.9,10.0,Snowy
2024-01-14,TORONTO CITY,0.0,-11.2,8.0,Sunny
2024-01-15,TORONTO CITY,-7.1,0.0,0.0,Snowy


## Use ffill() method with fillna() to handle missing values

In [15]:
df3 = df.fillna(method='ffill')
df3

  df3 = df.fillna(method='ffill')


Unnamed: 0_level_0,Station Name,Max Temp (°C),Min Temp (°C),Snow on Grnd (cm),Type
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01,TORONTO CITY,-0.7,-3.1,2.0,Sunny
2024-01-04,TORONTO CITY,-0.7,-6.0,2.0,Snowy
2024-01-05,TORONTO CITY,0.4,-2.1,4.0,Snowy
2024-01-06,TORONTO CITY,2.1,-2.1,5.0,Snowy
2024-01-07,TORONTO CITY,0.7,-4.2,5.0,Sunny
2024-01-12,TORONTO CITY,3.9,-4.2,8.0,Snowy
2024-01-13,TORONTO CITY,6.8,-2.9,10.0,Snowy
2024-01-14,TORONTO CITY,6.8,-11.2,8.0,Sunny
2024-01-15,TORONTO CITY,-7.1,-11.2,8.0,Snowy


### Since DataFrame.fillna with 'method' is deprecated, let's use ffill() method 

In [16]:
df3 = df.ffill()
df3

Unnamed: 0_level_0,Station Name,Max Temp (°C),Min Temp (°C),Snow on Grnd (cm),Type
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01,TORONTO CITY,-0.7,-3.1,2.0,Sunny
2024-01-04,TORONTO CITY,-0.7,-6.0,2.0,Snowy
2024-01-05,TORONTO CITY,0.4,-2.1,4.0,Snowy
2024-01-06,TORONTO CITY,2.1,-2.1,5.0,Snowy
2024-01-07,TORONTO CITY,0.7,-4.2,5.0,Sunny
2024-01-12,TORONTO CITY,3.9,-4.2,8.0,Snowy
2024-01-13,TORONTO CITY,6.8,-2.9,10.0,Snowy
2024-01-14,TORONTO CITY,6.8,-11.2,8.0,Sunny
2024-01-15,TORONTO CITY,-7.1,-11.2,8.0,Snowy


In [17]:
df3 = df.ffill(axis="rows")
df3

Unnamed: 0_level_0,Station Name,Max Temp (°C),Min Temp (°C),Snow on Grnd (cm),Type
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01,TORONTO CITY,-0.7,-3.1,2.0,Sunny
2024-01-04,TORONTO CITY,-0.7,-6.0,2.0,Snowy
2024-01-05,TORONTO CITY,0.4,-2.1,4.0,Snowy
2024-01-06,TORONTO CITY,2.1,-2.1,5.0,Snowy
2024-01-07,TORONTO CITY,0.7,-4.2,5.0,Sunny
2024-01-12,TORONTO CITY,3.9,-4.2,8.0,Snowy
2024-01-13,TORONTO CITY,6.8,-2.9,10.0,Snowy
2024-01-14,TORONTO CITY,6.8,-11.2,8.0,Sunny
2024-01-15,TORONTO CITY,-7.1,-11.2,8.0,Snowy


## Use groupby() and ffill() to handle missing values on a single column

In [18]:
df['Min Temp (°C) FFILL'] = df.groupby('Type')['Min Temp (°C)'].ffill()
df

Unnamed: 0_level_0,Station Name,Max Temp (°C),Min Temp (°C),Snow on Grnd (cm),Type,Min Temp (°C) FFILL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-01-01,TORONTO CITY,-0.7,-3.1,2.0,Sunny,-3.1
2024-01-04,TORONTO CITY,,-6.0,,Snowy,-6.0
2024-01-05,TORONTO CITY,0.4,-2.1,4.0,Snowy,-2.1
2024-01-06,TORONTO CITY,2.1,,5.0,Snowy,-2.1
2024-01-07,TORONTO CITY,0.7,-4.2,5.0,Sunny,-4.2
2024-01-12,TORONTO CITY,3.9,,8.0,Snowy,-2.1
2024-01-13,TORONTO CITY,6.8,-2.9,10.0,Snowy,-2.9
2024-01-14,TORONTO CITY,,-11.2,8.0,Sunny,-11.2
2024-01-15,TORONTO CITY,-7.1,,,Snowy,-2.9


In [19]:
df['Max Temp (°C) FFILL'] = df.groupby('Type')['Max Temp (°C)'].ffill()
df

Unnamed: 0_level_0,Station Name,Max Temp (°C),Min Temp (°C),Snow on Grnd (cm),Type,Min Temp (°C) FFILL,Max Temp (°C) FFILL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-01-01,TORONTO CITY,-0.7,-3.1,2.0,Sunny,-3.1,-0.7
2024-01-04,TORONTO CITY,,-6.0,,Snowy,-6.0,
2024-01-05,TORONTO CITY,0.4,-2.1,4.0,Snowy,-2.1,0.4
2024-01-06,TORONTO CITY,2.1,,5.0,Snowy,-2.1,2.1
2024-01-07,TORONTO CITY,0.7,-4.2,5.0,Sunny,-4.2,0.7
2024-01-12,TORONTO CITY,3.9,,8.0,Snowy,-2.1,3.9
2024-01-13,TORONTO CITY,6.8,-2.9,10.0,Snowy,-2.9,6.8
2024-01-14,TORONTO CITY,,-11.2,8.0,Sunny,-11.2,0.7
2024-01-15,TORONTO CITY,-7.1,,,Snowy,-2.9,-7.1


## Use groupby() and bfill() to handle missing values on a single column

In [20]:
df['Max Temp (°C) BFILL'] = df.groupby('Type')['Max Temp (°C)'].bfill()
df

Unnamed: 0_level_0,Station Name,Max Temp (°C),Min Temp (°C),Snow on Grnd (cm),Type,Min Temp (°C) FFILL,Max Temp (°C) FFILL,Max Temp (°C) BFILL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-01-01,TORONTO CITY,-0.7,-3.1,2.0,Sunny,-3.1,-0.7,-0.7
2024-01-04,TORONTO CITY,,-6.0,,Snowy,-6.0,,0.4
2024-01-05,TORONTO CITY,0.4,-2.1,4.0,Snowy,-2.1,0.4,0.4
2024-01-06,TORONTO CITY,2.1,,5.0,Snowy,-2.1,2.1,2.1
2024-01-07,TORONTO CITY,0.7,-4.2,5.0,Sunny,-4.2,0.7,0.7
2024-01-12,TORONTO CITY,3.9,,8.0,Snowy,-2.1,3.9,3.9
2024-01-13,TORONTO CITY,6.8,-2.9,10.0,Snowy,-2.9,6.8,6.8
2024-01-14,TORONTO CITY,,-11.2,8.0,Sunny,-11.2,0.7,
2024-01-15,TORONTO CITY,-7.1,,,Snowy,-2.9,-7.1,-7.1


Since missing values still present inside **"Max Temp (°C) BFILL"** column, let's use one more round of ffill() to handle the missing value

In [21]:
df['Max Temp (°C) NoZero'] = df.groupby('Type')['Max Temp (°C) BFILL'].ffill()
df

Unnamed: 0_level_0,Station Name,Max Temp (°C),Min Temp (°C),Snow on Grnd (cm),Type,Min Temp (°C) FFILL,Max Temp (°C) FFILL,Max Temp (°C) BFILL,Max Temp (°C) NoZero
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-01-01,TORONTO CITY,-0.7,-3.1,2.0,Sunny,-3.1,-0.7,-0.7,-0.7
2024-01-04,TORONTO CITY,,-6.0,,Snowy,-6.0,,0.4,0.4
2024-01-05,TORONTO CITY,0.4,-2.1,4.0,Snowy,-2.1,0.4,0.4,0.4
2024-01-06,TORONTO CITY,2.1,,5.0,Snowy,-2.1,2.1,2.1,2.1
2024-01-07,TORONTO CITY,0.7,-4.2,5.0,Sunny,-4.2,0.7,0.7,0.7
2024-01-12,TORONTO CITY,3.9,,8.0,Snowy,-2.1,3.9,3.9,3.9
2024-01-13,TORONTO CITY,6.8,-2.9,10.0,Snowy,-2.9,6.8,6.8,6.8
2024-01-14,TORONTO CITY,,-11.2,8.0,Sunny,-11.2,0.7,,0.7
2024-01-15,TORONTO CITY,-7.1,,,Snowy,-2.9,-7.1,-7.1,-7.1


## Use groupby() and ffill() method to handle missing values on multiple columns

In [24]:
ffilled_result = df.groupby('Type')[['Min Temp (°C)', 'Snow on Grnd (cm)']].ffill()
ffilled_result

Unnamed: 0_level_0,Min Temp (°C),Snow on Grnd (cm)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-01-01,-3.1,2.0
2024-01-04,-6.0,
2024-01-05,-2.1,4.0
2024-01-06,-2.1,5.0
2024-01-07,-4.2,5.0
2024-01-12,-2.1,8.0
2024-01-13,-2.9,10.0
2024-01-14,-11.2,8.0
2024-01-15,-2.9,10.0


In [25]:
ffilled_result = ffilled_result.rename(columns={
    'Min Temp (°C)': 'Min Temp (°C) FFILL',
    'Snow on Grnd (cm)': 'Snow on Grnd (cm) FFILL'
})

In [26]:
df[ffilled_result.columns] = ffilled_result
df

Unnamed: 0_level_0,Station Name,Max Temp (°C),Min Temp (°C),Snow on Grnd (cm),Type,Min Temp (°C) FFILL,Max Temp (°C) FFILL,Max Temp (°C) BFILL,Max Temp (°C) NoZero,Snow on Grnd (cm) FFILL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-01-01,TORONTO CITY,-0.7,-3.1,2.0,Sunny,-3.1,-0.7,-0.7,-0.7,2.0
2024-01-04,TORONTO CITY,,-6.0,,Snowy,-6.0,,0.4,0.4,
2024-01-05,TORONTO CITY,0.4,-2.1,4.0,Snowy,-2.1,0.4,0.4,0.4,4.0
2024-01-06,TORONTO CITY,2.1,,5.0,Snowy,-2.1,2.1,2.1,2.1,5.0
2024-01-07,TORONTO CITY,0.7,-4.2,5.0,Sunny,-4.2,0.7,0.7,0.7,5.0
2024-01-12,TORONTO CITY,3.9,,8.0,Snowy,-2.1,3.9,3.9,3.9,8.0
2024-01-13,TORONTO CITY,6.8,-2.9,10.0,Snowy,-2.9,6.8,6.8,6.8,10.0
2024-01-14,TORONTO CITY,,-11.2,8.0,Sunny,-11.2,0.7,,0.7,8.0
2024-01-15,TORONTO CITY,-7.1,,,Snowy,-2.9,-7.1,-7.1,-7.1,10.0
