In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler

from lampy import data
from lampy import metrics

In [2]:
%matplotlib inline
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

In [3]:
data_file = 'data/636f6a2e-3a48-4801-afce-05c864adc60e-lampy_2019.csv'
sid = 'y7e4onsytkb3ydonflz5kcbcigkh5ulo'
target = 'pm10'
attributes = [target, 'o3', 'pm2p5', 'no2', 'so2']

df = data.read_2018_data(data_file).loc[sid, attributes]
resampled_df = data.resample_data(df)
X, y = data.create_dataset(resampled_df)
X_tr, X_val, y_tr, y_val = data.train_validation_split(X, y)

In [4]:
def transform_dataset(X, y, window):
    n_rows, n_features = X.shape
    new_X = np.zeros((n_rows, window * n_features))
    new_X[:, :n_features] = X
    for idx in range(1, window):
        start = n_features * idx
        end = n_features * (idx + 1)
        new_X[idx:, start:end] = X[:-idx]
    return new_X[(window - 1):], y.copy()[(window - 1):]

In [5]:
for window in range(1, 49):
    X_tr_trans, y_tr_trans = transform_dataset(X_tr, y_tr, window)
    X_val_trans, y_val_trans = transform_dataset(X_val, y_val, window)

    linear = linear_model.LinearRegression()
    linear.fit(X_tr_trans, y_tr_trans)

    y_pred_trans = linear.predict(X_val_trans)

    linear_rmse = metrics.rmse(y_val_trans, y_pred_trans)
    print('window:', window, 'linear RMSE:', linear_rmse)

window: 1 linear RMSE: 8.145235410655742
window: 2 linear RMSE: 8.122515312541152
window: 3 linear RMSE: 8.78027342676847
window: 4 linear RMSE: 8.835495638731482
window: 5 linear RMSE: 9.541200365849472
window: 6 linear RMSE: 9.431053529180069
window: 7 linear RMSE: 9.108941947952427
window: 8 linear RMSE: 9.117424223978361
window: 9 linear RMSE: 9.06936523252595
window: 10 linear RMSE: 9.054595877625369
window: 11 linear RMSE: 8.837288005024213
window: 12 linear RMSE: 8.842808284677217
window: 13 linear RMSE: 8.922473773539323
window: 14 linear RMSE: 8.907601144683643
window: 15 linear RMSE: 8.885296066315561
window: 16 linear RMSE: 8.894585222253413
window: 17 linear RMSE: 8.865232668985977
window: 18 linear RMSE: 8.861365513795032
window: 19 linear RMSE: 8.89708645489496
window: 20 linear RMSE: 8.892439288222372
window: 21 linear RMSE: 8.877838117691873
window: 22 linear RMSE: 8.879739018467792
window: 23 linear RMSE: 8.878134765887278
window: 24 linear RMSE: 8.874138504373498
wind