In [19]:
import pandas as pd
import numpy as np
from datetime import datetime
import urllib.request, json # we will need urllib to communicate with the api and then json to read the data we get

In [20]:
marburg = pd.read_pickle("../0_data/marburg_clean.pkl")
duisburg = pd.read_pickle('../0_data/duisburg_clean.pkl')

nrw_holidays = pd.read_pickle("../0_data/nrw_holidays.pkl")
he_holidays = pd.read_pickle("../0_data/he_holidays.pkl")

marburg_weather = pd.read_pickle("../0_data/weather/marburg_weather.pkl")
duisburg_weather = pd.read_pickle("../0_data/weather/duisburg_weather.pkl")

In [21]:
ma = pd.DataFrame(marburg.resample('H').count()["day"])
ma.rename(columns={'day': 'demand'}, inplace=True)

ma['dayofyear'] = ma.index.map(lambda datetime : datetime.dayofyear)
ma['week'] = ma.index.map(lambda datetime : datetime.week)
ma['weekday'] = ma.index.map(lambda datetime : datetime.weekday)
ma['is_weekday'] = ma['weekday'].map(lambda day : day < 5)
ma['hour'] = ma.index.map(lambda datetime : datetime.hour)

ma = ma.merge(marburg_weather, left_index=True, right_index=True)

In [22]:
ma['is_holiday'] = he_holidays['is_holiday']
ma = ma.fillna(False)

In [23]:
du = pd.DataFrame(duisburg.resample('H').count()["day"])
du.rename(columns={'day': 'demand'}, inplace=True)

du['dayofyear'] = du.index.map(lambda datetime : datetime.dayofyear)
du['week'] = du.index.map(lambda datetime : datetime.week)
du['weekday'] = du.index.map(lambda datetime : datetime.weekday)
du['is_weekday'] = du['weekday'].map(lambda day : day < 5)
du['hour'] = du.index.map(lambda datetime : datetime.hour)

du = du.merge(duisburg_weather, left_index=True, right_index=True)

In [24]:
du['is_holiday'] = nrw_holidays['is_holiday']
du = du.fillna(False)

In [25]:
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold

In [26]:
scaler = StandardScaler()

In [27]:
X_ma = scaler.fit_transform(ma[['dayofyear','week', 'weekday', 'hour', 'temperature', 'precipitation', 'cloud_coverage', 'wind_velocity']].values)
y_ma = ma['demand'].values

In [28]:
X_ma = np.concatenate((X_ma,ma[['is_weekday', 'is_holiday']].values), axis=1)

In [29]:
X_du = scaler.fit_transform(du[['dayofyear','week', 'weekday', 'hour', 'temperature', 'precipitation', 'cloud_coverage', 'wind_velocity']].values)
y_du = du['demand'].values

In [30]:
X_du = np.concatenate((X_du,du[['is_weekday', 'is_holiday']].values), axis=1)

In [31]:
selection_model = Lasso(alpha = 0.5)
selection_model.fit(X_ma,ma['demand'])
selection_model.coef_

array([ 0.        ,  0.54045036, -1.69769162,  3.46357418,  7.42037199,
       -0.8621322 , -0.        , -0.        ,  0.46587421, -0.        ])

In [32]:
selection_model.fit(X_du,du['demand'])
selection_model.coef_

array([ 0.        ,  2.40514701, -1.87296885,  1.59838971,  4.9648298 ,
       -0.47696063, -0.        ,  0.        ,  1.35430112, -0.        ])