In [38]:
import pandas as pd

#### Import data

In [39]:
df = pd.read_csv('resources/WeatherData.csv')

#### Check the first 5 rows 

In [40]:
df.head()

Unnamed: 0,day,temperature,windspeed,event
0,01/01/2018,32.0,6.0,Rain
1,03/01/2018,,7.0,Sunny
2,04/01/2018,28.0,5.0,
3,05/01/2018,30.0,6.0,Snow
4,06/01/2018,31.0,6.0,Rain


#### Check the last 5 rows

In [41]:
df.tail()

Unnamed: 0,day,temperature,windspeed,event
9,12/01/2018,27.0,8.0,
10,13/01/2018,,,Sunny
11,14/01/2018,23.0,5.0,
12,15/01/2018,25.0,,Snow
13,16/01/2018,,,


#### Make the day column a date

In [43]:
dateparse = lambda x: pd.datetime.strptime(x, '%d/%m/%Y') #01/01/2018
df = pd.read_csv('resources/WeatherData.csv', parse_dates=['day'], date_parser=dateparse)
type(df.day[0])

pandas._libs.tslib.Timestamp

#### Set the day column as the dataframe index

In [44]:
df = df.set_index('day')
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01,32.0,6.0,Rain
2018-01-03,,7.0,Sunny
2018-01-04,28.0,5.0,
2018-01-05,30.0,6.0,Snow
2018-01-06,31.0,6.0,Rain
2018-01-08,,5.0,
2018-01-09,25.0,,Rain
2018-01-10,29.0,7.0,
2018-01-11,30.0,,Sunny
2018-01-12,27.0,8.0,


### Handling Missing data 

### fillna

In [45]:
new_df = df.fillna(0)
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01,32.0,6.0,Rain
2018-01-03,0.0,7.0,Sunny
2018-01-04,28.0,5.0,0
2018-01-05,30.0,6.0,Snow
2018-01-06,31.0,6.0,Rain
2018-01-08,0.0,5.0,0
2018-01-09,25.0,0.0,Rain
2018-01-10,29.0,7.0,0
2018-01-11,30.0,0.0,Sunny
2018-01-12,27.0,8.0,0


#### fillna sparingly

In [46]:
new_df = df.fillna({
    'temperature':0,
    'windspeed':0,
    'event':'No Event'
})
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01,32.0,6.0,Rain
2018-01-03,0.0,7.0,Sunny
2018-01-04,28.0,5.0,No Event
2018-01-05,30.0,6.0,Snow
2018-01-06,31.0,6.0,Rain
2018-01-08,0.0,5.0,No Event
2018-01-09,25.0,0.0,Rain
2018-01-10,29.0,7.0,No Event
2018-01-11,30.0,0.0,Sunny
2018-01-12,27.0,8.0,No Event


#### forward fillna

In [47]:
new_df = df.fillna(method='ffill')
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01,32.0,6.0,Rain
2018-01-03,32.0,7.0,Sunny
2018-01-04,28.0,5.0,Sunny
2018-01-05,30.0,6.0,Snow
2018-01-06,31.0,6.0,Rain
2018-01-08,31.0,5.0,Rain
2018-01-09,25.0,5.0,Rain
2018-01-10,29.0,7.0,Rain
2018-01-11,30.0,7.0,Sunny
2018-01-12,27.0,8.0,Sunny


#### backward fillna

In [48]:
new_df =df.fillna(method='bfill')
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01,32.0,6.0,Rain
2018-01-03,28.0,7.0,Sunny
2018-01-04,28.0,5.0,Snow
2018-01-05,30.0,6.0,Snow
2018-01-06,31.0,6.0,Rain
2018-01-08,25.0,5.0,Rain
2018-01-09,25.0,7.0,Rain
2018-01-10,29.0,7.0,Sunny
2018-01-11,30.0,8.0,Sunny
2018-01-12,27.0,8.0,Sunny


#### ffill with limit

In [49]:
new_df = df.fillna(method='ffill', limit=1)
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01,32.0,6.0,Rain
2018-01-03,32.0,7.0,Sunny
2018-01-04,28.0,5.0,Sunny
2018-01-05,30.0,6.0,Snow
2018-01-06,31.0,6.0,Rain
2018-01-08,31.0,5.0,Rain
2018-01-09,25.0,5.0,Rain
2018-01-10,29.0,7.0,Rain
2018-01-11,30.0,7.0,Sunny
2018-01-12,27.0,8.0,Sunny


### Interpolate 

In [50]:
new_df = df.interpolate()
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01,32.0,6.0,Rain
2018-01-03,30.0,7.0,Sunny
2018-01-04,28.0,5.0,
2018-01-05,30.0,6.0,Snow
2018-01-06,31.0,6.0,Rain
2018-01-08,28.0,5.0,
2018-01-09,25.0,6.0,Rain
2018-01-10,29.0,7.0,
2018-01-11,30.0,7.5,Sunny
2018-01-12,27.0,8.0,


#### Interpolate with time

In [51]:
new_df = df.interpolate(method='time')
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01,32.0,6.0,Rain
2018-01-03,29.333333,7.0,Sunny
2018-01-04,28.0,5.0,
2018-01-05,30.0,6.0,Snow
2018-01-06,31.0,6.0,Rain
2018-01-08,27.0,5.0,
2018-01-09,25.0,6.0,Rain
2018-01-10,29.0,7.0,
2018-01-11,30.0,7.5,Sunny
2018-01-12,27.0,8.0,


In [18]:
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01,32.0,6.0,Rain
2018-03-01,,7.0,Sunny
2018-04-01,28.0,5.0,
2018-05-01,,,Snow
2018-06-01,31.0,,
2018-08-01,,5.0,
2018-09-01,25.0,,Rain
2018-10-01,29.0,7.0,
2018-11-01,30.0,,Sunny
2018-12-01,27.0,8.0,
