In [1]:
from sklearn.model_selection import ParameterSampler
from sklearn.preprocessing import StandardScaler

from lampy import data
from lampy import metrics
from lampy.lstm import LSTM

In [2]:
data_file = 'data/636f6a2e-3a48-4801-afce-05c864adc60e-lampy_2019.csv'
sid = 'y7e4onsytkb3ydonflz5kcbcigkh5ulo'
target = 'pm10'
attributes = [target, 'o3', 'pm2p5', 'no2', 'so2']

df = data.read_2018_data(data_file).loc[sid, attributes]
resampled_df = data.resample_data(df, period='15T')
X, y = data.create_dataset(resampled_df)
X_tr, X_val, y_tr, y_val = data.train_validation_split(X, y)

X_scaler = StandardScaler()
y_scaler = StandardScaler()

X_tr_scaled = X_scaler.fit_transform(X_tr)
X_val_scaled = X_scaler.transform(X_val)

y_tr_scaled = y_scaler.fit_transform(y_tr)
y_val_scaled = y_scaler.transform(y_val)

In [3]:
params_grid = {'hidden_size': [8, 16, 32, 64, 128, 256],
              'seq_len': list(range(2, 200)),
              'num_layers': [1, 2, 3]}

params_list = list(ParameterSampler(params_grid, n_iter=30))
params_list

[{'seq_len': 180, 'num_layers': 1, 'hidden_size': 16},
 {'seq_len': 101, 'num_layers': 1, 'hidden_size': 32},
 {'seq_len': 143, 'num_layers': 3, 'hidden_size': 256},
 {'seq_len': 164, 'num_layers': 1, 'hidden_size': 128},
 {'seq_len': 74, 'num_layers': 3, 'hidden_size': 16},
 {'seq_len': 173, 'num_layers': 3, 'hidden_size': 128},
 {'seq_len': 95, 'num_layers': 2, 'hidden_size': 32},
 {'seq_len': 108, 'num_layers': 1, 'hidden_size': 256},
 {'seq_len': 85, 'num_layers': 1, 'hidden_size': 256},
 {'seq_len': 169, 'num_layers': 1, 'hidden_size': 64},
 {'seq_len': 28, 'num_layers': 2, 'hidden_size': 32},
 {'seq_len': 140, 'num_layers': 3, 'hidden_size': 128},
 {'seq_len': 129, 'num_layers': 1, 'hidden_size': 32},
 {'seq_len': 185, 'num_layers': 3, 'hidden_size': 256},
 {'seq_len': 161, 'num_layers': 3, 'hidden_size': 128},
 {'seq_len': 65, 'num_layers': 2, 'hidden_size': 32},
 {'seq_len': 84, 'num_layers': 2, 'hidden_size': 32},
 {'seq_len': 76, 'num_layers': 1, 'hidden_size': 128},
 {'seq_l

In [4]:
#for params in params_list:
#    lstm = LSTM(input_size=5,
#                hidden_size=params['hidden_size'],
#                output_size=1,
#                num_layers=params['num_layers'])
#    lstm_stats = lstm.train(X_tr_scaled, y_tr_scaled, X_val_scaled, y_val_scaled,
#                            n_epochs=50, seq_len=params['seq_len'],
#                            verbose=False)
#    y_pred_scaled = lstm.predict(X_val_scaled)
#    y_pred = y_scaler.inverse_transform(y_pred_scaled)
#    val_rmse = metrics.rmse(y_val, y_pred)
#    y_pred_scaled = lstm.predict(X_tr_scaled)
#    y_pred = y_scaler.inverse_transform(y_pred_scaled)
#    tr_rmse = metrics.rmse(y_tr, y_pred)
#    print('train RMSE:\t', tr_rmse,
#          '\tvalidation RMSE:\t',
#          val_rmse, '\tparams:', params)