In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
df=pd.read_csv('Windspeed.csv')
df.head()

Unnamed: 0,Date,Temperature,Windspeed,Status
0,06-05-2020,35.4,10.788,sunny
1,07-05-2020,36.7,,sunny
2,08-05-2020,,6.88,
3,,30.4,,cloudy
4,10-05-2020,,19.055,rainy


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22 entries, 0 to 21
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Date         18 non-null     object 
 1   Temperature  16 non-null     float64
 2   Windspeed    16 non-null     float64
 3   Status       16 non-null     object 
dtypes: float64(2), object(2)
memory usage: 832.0+ bytes


In [4]:
df.isnull().sum()

Date           4
Temperature    6
Windspeed      6
Status         6
dtype: int64

## filling a common value to all missing data
#### lets try filling 0 to all the missing data

In [5]:
df.fillna(0)

Unnamed: 0,Date,Temperature,Windspeed,Status
0,06-05-2020,35.4,10.788,sunny
1,07-05-2020,36.7,0.0,sunny
2,08-05-2020,0.0,6.88,0
3,0,30.4,0.0,cloudy
4,10-05-2020,0.0,19.055,rainy
5,11-05-2020,24.2,13.9,sunny
6,12-05-2020,22.7,0.0,0
7,13-05-2020,35.4,10.788,sunny
8,0,26.3,8.658,0
9,15-05-2020,22.4,14.884,sunny


## Adding missing data to individual colums
#### the same method can be used to add missing data for various columns differently.We just need to pass a dictionary as below

In [6]:
df.fillna({
    "Temperature":0,
    "Windspeed":5,
    "Status":"sunny"
})

Unnamed: 0,Date,Temperature,Windspeed,Status
0,06-05-2020,35.4,10.788,sunny
1,07-05-2020,36.7,5.0,sunny
2,08-05-2020,0.0,6.88,sunny
3,,30.4,5.0,cloudy
4,10-05-2020,0.0,19.055,rainy
5,11-05-2020,24.2,13.9,sunny
6,12-05-2020,22.7,5.0,sunny
7,13-05-2020,35.4,10.788,sunny
8,,26.3,8.658,sunny
9,15-05-2020,22.4,14.884,sunny


## Forward fill(row)

In [7]:
df.fillna(method="ffill")

Unnamed: 0,Date,Temperature,Windspeed,Status
0,06-05-2020,35.4,10.788,sunny
1,07-05-2020,36.7,10.788,sunny
2,08-05-2020,36.7,6.88,sunny
3,08-05-2020,30.4,6.88,cloudy
4,10-05-2020,30.4,19.055,rainy
5,11-05-2020,24.2,13.9,sunny
6,12-05-2020,22.7,13.9,sunny
7,13-05-2020,35.4,10.788,sunny
8,13-05-2020,26.3,8.658,sunny
9,15-05-2020,22.4,14.884,sunny


## Backward fill(row)

In [8]:
df.fillna(method="bfill")

Unnamed: 0,Date,Temperature,Windspeed,Status
0,06-05-2020,35.4,10.788,sunny
1,07-05-2020,36.7,6.88,sunny
2,08-05-2020,30.4,6.88,cloudy
3,10-05-2020,30.4,19.055,cloudy
4,10-05-2020,24.2,19.055,rainy
5,11-05-2020,24.2,13.9,sunny
6,12-05-2020,22.7,10.788,sunny
7,13-05-2020,35.4,10.788,sunny
8,15-05-2020,26.3,8.658,sunny
9,15-05-2020,22.4,14.884,sunny


## limit the forward or backward fill

In [9]:
df.fillna(method="ffill",limit=1)

Unnamed: 0,Date,Temperature,Windspeed,Status
0,06-05-2020,35.4,10.788,sunny
1,07-05-2020,36.7,10.788,sunny
2,08-05-2020,36.7,6.88,sunny
3,08-05-2020,30.4,6.88,cloudy
4,10-05-2020,30.4,19.055,rainy
5,11-05-2020,24.2,13.9,sunny
6,12-05-2020,22.7,13.9,sunny
7,13-05-2020,35.4,10.788,sunny
8,13-05-2020,26.3,8.658,sunny
9,15-05-2020,22.4,14.884,sunny


## filling with pandas object
#### there are many pandas objects like df.sum(),df.max() etc.we can fill the missing values with these too

In [10]:
df.fillna(df.mean())

Unnamed: 0,Date,Temperature,Windspeed,Status
0,06-05-2020,35.4,10.788,sunny
1,07-05-2020,36.7,13.809563,sunny
2,08-05-2020,29.48125,6.88,
3,,30.4,13.809563,cloudy
4,10-05-2020,29.48125,19.055,rainy
5,11-05-2020,24.2,13.9,sunny
6,12-05-2020,22.7,13.809563,
7,13-05-2020,35.4,10.788,sunny
8,,26.3,8.658,
9,15-05-2020,22.4,14.884,sunny


## Filling for specific range of columns

In [11]:
df.fillna(df.mean()['Temperature':'Windspeed'])

Unnamed: 0,Date,Temperature,Windspeed,Status
0,06-05-2020,35.4,10.788,sunny
1,07-05-2020,36.7,13.809563,sunny
2,08-05-2020,29.48125,6.88,
3,,30.4,13.809563,cloudy
4,10-05-2020,29.48125,19.055,rainy
5,11-05-2020,24.2,13.9,sunny
6,12-05-2020,22.7,13.809563,
7,13-05-2020,35.4,10.788,sunny
8,,26.3,8.658,
9,15-05-2020,22.4,14.884,sunny


## Interpolate missing value

In [12]:
df.interpolate()

Unnamed: 0,Date,Temperature,Windspeed,Status
0,06-05-2020,35.4,10.788,sunny
1,07-05-2020,36.7,8.834,sunny
2,08-05-2020,33.55,6.88,
3,,30.4,12.9675,cloudy
4,10-05-2020,27.3,19.055,rainy
5,11-05-2020,24.2,13.9,sunny
6,12-05-2020,22.7,12.344,
7,13-05-2020,35.4,10.788,sunny
8,,26.3,8.658,
9,15-05-2020,22.4,14.884,sunny


### Time interpolate

In [13]:
df.interpolate(methode="time")

Unnamed: 0,Date,Temperature,Windspeed,Status
0,06-05-2020,35.4,10.788,sunny
1,07-05-2020,36.7,8.834,sunny
2,08-05-2020,33.55,6.88,
3,,30.4,12.9675,cloudy
4,10-05-2020,27.3,19.055,rainy
5,11-05-2020,24.2,13.9,sunny
6,12-05-2020,22.7,12.344,
7,13-05-2020,35.4,10.788,sunny
8,,26.3,8.658,
9,15-05-2020,22.4,14.884,sunny


### Interpolation direction

In [14]:
df.interpolate(limit=1,limit_direction='backward')

Unnamed: 0,Date,Temperature,Windspeed,Status
0,06-05-2020,35.4,10.788,sunny
1,07-05-2020,36.7,8.834,sunny
2,08-05-2020,33.55,6.88,
3,,30.4,12.9675,cloudy
4,10-05-2020,27.3,19.055,rainy
5,11-05-2020,24.2,13.9,sunny
6,12-05-2020,22.7,12.344,
7,13-05-2020,35.4,10.788,sunny
8,,26.3,8.658,
9,15-05-2020,22.4,14.884,sunny


### Limit area of interpolation
##### we can also restrict our missing value to be filled with inside or outside values

In [15]:
df.interpolate(limit_direction='both',limit_area='inside',limit=1)

Unnamed: 0,Date,Temperature,Windspeed,Status
0,06-05-2020,35.4,10.788,sunny
1,07-05-2020,36.7,8.834,sunny
2,08-05-2020,33.55,6.88,
3,,30.4,12.9675,cloudy
4,10-05-2020,27.3,19.055,rainy
5,11-05-2020,24.2,13.9,sunny
6,12-05-2020,22.7,12.344,
7,13-05-2020,35.4,10.788,sunny
8,,26.3,8.658,
9,15-05-2020,22.4,14.884,sunny


In [16]:
df.interpolate(limit_direction='both',limit_area='outside',limit=1)

Unnamed: 0,Date,Temperature,Windspeed,Status
0,06-05-2020,35.4,10.788,sunny
1,07-05-2020,36.7,,sunny
2,08-05-2020,,6.88,
3,,30.4,,cloudy
4,10-05-2020,,19.055,rainy
5,11-05-2020,24.2,13.9,sunny
6,12-05-2020,22.7,,
7,13-05-2020,35.4,10.788,sunny
8,,26.3,8.658,
9,15-05-2020,22.4,14.884,sunny
