In [16]:
import pandas as pd
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer


In [17]:
def weather_data_first_step():
    weather_data = pd.read_csv('07156.csv')
    weather_data.columns = ['date','hour','temp','dwpt','rhum','prcp','snow','wdir','wspd','wpgt','pres','tsun','coco']
    weather_data = weather_data[(weather_data.date >= '2020-09-01') & (weather_data.date <= '2021-10-31')]
    columns_na_prop = weather_data.isna().mean()
    columns_to_drop = list(columns_na_prop[columns_na_prop == 1].index)
    weather_data = weather_data.drop(columns = columns_to_drop)
    weather_data['date'] = pd.to_datetime(weather_data.date) + pd.to_timedelta(weather_data.hour, unit='h')
    return weather_data   

In [18]:
weather_data = weather_data_first_step()

In [19]:
weather_data.isna().sum()

date       0
hour       0
temp       0
dwpt       0
rhum       0
prcp    1099
wdir       0
wspd       0
pres       0
dtype: int64

As we can see above, we did some initial changes to our dataset, namely changing the column names, selecting the date range we are interested in, dropping the columns where all values where *NaN*, and concatenating the date and hour columns in order to have the date in the same formate as the *bike_count*.
After this initial step in preparing our data,we can see that we still have some missing values in the precipitation column(around 10%). In order to tackle this problem, we are going to do multivariate feature amputation in order to predict the missing values. 

In [63]:
data_for_imp = weather_data.drop(columns = ['date'])
imp = IterativeImputer( random_state=0)
transformed_data= imp.fit_transform(data_for_imp)
transformed_weather_data = pd.DataFrame(transformed_data[:,1:], columns = ['temp','dwpt','rhum','prcp','wdir','wspd','pres'])
transformed_weather_data['date'] = weather_data['date'].values
date_column = transformed_weather_data.pop('date')
transformed_weather_data.insert(0, 'date', date_column)

In [65]:
transformed_weather_data

Unnamed: 0,date,temp,dwpt,rhum,prcp,wdir,wspd,pres
0,2020-09-01 00:00:00,13.9,9.7,76.0,0.057646,350.0,7.6,1020.4
1,2020-09-01 01:00:00,13.6,9.6,77.0,0.000000,330.0,3.6,1020.1
2,2020-09-01 02:00:00,13.0,9.4,79.0,0.000000,290.0,3.6,1019.9
3,2020-09-01 03:00:00,12.9,9.7,81.0,0.096737,310.0,5.4,1019.8
4,2020-09-01 04:00:00,12.4,9.6,83.0,0.000000,300.0,5.4,1019.5
...,...,...,...,...,...,...,...,...
10135,2021-10-31 19:00:00,11.3,9.6,89.0,0.000000,220.0,11.2,1001.0
10136,2021-10-31 20:00:00,10.7,8.8,88.0,0.000000,230.0,11.2,1001.4
10137,2021-10-31 21:00:00,10.3,8.7,90.0,0.000000,220.0,13.0,1001.9
10138,2021-10-31 22:00:00,10.1,7.7,85.0,0.000000,220.0,13.0,1002.3
