In [4]:
# https://machinelearningmastery.com/lstm-for-time-series-prediction-in-pytorch/

import torch

# set variables
lookback = 4
timeseries = None
X_train = None
y_train = None
X_test = None
y_test = None

def create_dataset(dataset, lookback=1):
    """Transform a time series into a prediction dataset
    
    Args:
        dataset: A numpy array of time series, first dimension is the time steps
        lookback: Size of window for prediction
    """
    X, y = [], []
    for i in range(len(dataset)-lookback):
        feature = dataset[i:i+lookback]
        target = dataset[i+1:i+lookback+1]
        X.append(feature)
        y.append(target)
        
    return torch.tensor(X), torch.tensor(y)

In [39]:
import torch.nn as nn

class AirModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=8, hidden_size=50, num_layers=1, batch_first=True)
        self.linear = nn.Linear(50, 1)
    def forward(self, x):
        x, _ = self.lstm(x)
        # optionally extract only the last time step
        # x = x[:, -1, :]
        x = self.linear(x)
        
        return x

In [30]:
import pandas as pd
import numpy as np

df_feature=pd.read_csv('../data/df_feature.csv')
X=pd.read_csv('../data/X_data_tr.csv', index_col='date', parse_dates=True)
y=pd.read_csv('../data/y_data_tr.csv', index_col='date', parse_dates=True)

selected_features=list(df_feature[df_feature.select==1]['variable'])
# X_train=np.array(X[selected_features][:-96])
# y_train=np.array(y['y_oecd'][:-96])
# X_test=np.array(X[selected_features][-96:])
# y_test=np.array(y['y_oecd'][-96:])

X_train, y_train = create_dataset(X[selected_features][:-96].to_numpy())
print(X_train.shape)
print(y_train.shape)
X_train = X_train.to(torch.float32)
y_train = y_train.to(torch.float32)

torch.Size([491, 1, 16])
torch.Size([491, 1, 16])


In [45]:
from model_team14 import select_features, plot_pca, DTW

metadata=pd.read_csv('../data/full_info.csv')

## select imputed & transformed data
X=pd.read_csv('../data/X_data_tr.csv', index_col='date', parse_dates=True)
threshold=0.3
criteria=None  
## filtering criteria is not cumulative explained variance ratio but just explained variance ratio


df_feature=select_features(metadata, X, threshold, criteria=criteria)
df_feature[df_feature.select==1]

y_type = 'y_agg'
test_year = 8
selected_features=list(df_feature[df_feature.select==1]['variable'])

X_train=X[selected_features][:-(test_year*12)]
y_train=y[y_type][:-(test_year*12)]           
X_test=X[selected_features][-(test_year*12):]   
y_test=y[y_type][-(test_year*12):]

X_train = torch.tensor(X_train.to_numpy())
y_train = torch.tensor(y_train.to_numpy())
X_test = torch.tensor(X_test.to_numpy())
y_test = torch.tensor(y_test.to_numpy())

# X_train, y_train = create_dataset(X_train.to_numpy())
# X_test, y_test = create_dataset(X_test.to_numpy())

X_train = X_train.to(torch.float32)
y_train = y_train.to(torch.float32)
X_test = X_test.to(torch.float32)
y_test = y_test.to(torch.float32)

In [46]:
# training 

import numpy as np
import torch.optim as optim
import torch.utils.data as data

model = AirModel()
optimizer = optim.Adam(model.parameters())
loss_fn = nn.MSELoss()
loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=8)

n_epochs = 2000
for epoch in range(n_epochs):
    model.train()
    for X_batch, y_batch in loader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    # Validation
    if epoch % 100 != 0:
        continue
    model.eval()

    with torch.no_grad():
        y_pred = model(X_train)
        train_rmse = np.sqrt(loss_fn(y_pred, y_train))
        y_pred = model(X_test)
        test_rmse = np.sqrt(loss_fn(y_pred, y_test))
    print("Epoch %d: train RMSE %.4f, test RMSE %.4f" % (epoch, train_rmse, test_rmse))

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 0: train RMSE 0.7419, test RMSE 0.5778
Epoch 100: train RMSE 0.7691, test RMSE 0.5253
Epoch 200: train RMSE 0.7664, test RMSE 0.5224
Epoch 300: train RMSE 0.7986, test RMSE 0.5298
Epoch 400: train RMSE 0.7639, test RMSE 0.5356
Epoch 500: train RMSE 0.7711, test RMSE 0.5250
Epoch 600: train RMSE 0.7840, test RMSE 0.5960
Epoch 700: train RMSE 0.7924, test RMSE 0.5809
Epoch 800: train RMSE 0.8012, test RMSE 0.5947
Epoch 900: train RMSE 0.7920, test RMSE 0.5877
Epoch 1000: train RMSE 0.7997, test RMSE 0.5644
Epoch 1100: train RMSE 0.8074, test RMSE 0.5900
Epoch 1200: train RMSE 0.8117, test RMSE 0.5659
Epoch 1300: train RMSE 0.8232, test RMSE 0.5919
Epoch 1400: train RMSE 0.8131, test RMSE 0.5883
Epoch 1500: train RMSE 0.8482, test RMSE 0.6052
Epoch 1600: train RMSE 0.8550, test RMSE 0.6098
Epoch 1700: train RMSE 0.8365, test RMSE 0.5890
Epoch 1800: train RMSE 0.8205, test RMSE 0.5673
Epoch 1900: train RMSE 0.8576, test RMSE 0.6189


In [None]:
# plot

import matplotlib.pyplot as plt

train_size = len(y_train)

with torch.no_grad():
    # shift train predictions for plotting
    train_plot = np.ones_like(timeseries) * np.nan
    y_pred = model(X_train)
    y_pred = y_pred[:, -1, :]
    train_plot[lookback:train_size] = model(X_train)[:, -1, :]
    # shift test predictions for plotting
    test_plot = np.ones_like(timeseries) * np.nan
    test_plot[train_size+lookback:len(timeseries)] = model(X_test)[:, -1, :]
# plot
plt.plot(timeseries, c='b')
plt.plot(train_plot, c='r')
plt.plot(test_plot, c='g')
plt.show()