In [4]:
# https://machinelearningmastery.com/lstm-for-time-series-prediction-in-pytorch/

import torch

# set variables
lookback = 4
timeseries = None
X_train = None
y_train = None
X_test = None
y_test = None

def create_dataset(dataset, lookback=1):
    """Transform a time series into a prediction dataset
    
    Args:
        dataset: A numpy array of time series, first dimension is the time steps
        lookback: Size of window for prediction
    """
    X, y = [], []
    for i in range(len(dataset)-lookback):
        feature = dataset[i:i+lookback]
        target = dataset[i+1:i+lookback+1]
        X.append(feature)
        y.append(target)
        
    return torch.tensor(X), torch.tensor(y)

In [25]:
import torch.nn as nn

class AirModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=16, hidden_size=50, num_layers=1, batch_first=True)
        self.linear = nn.Linear(50, 1)
    def forward(self, x):
        x, _ = self.lstm(x)
        # optionally extract only the last time step
        # x = x[:, -1, :]
        x = self.linear(x)
        
        return x

In [30]:
import pandas as pd
import numpy as np

df_feature=pd.read_csv('../data/df_feature.csv')
X=pd.read_csv('../data/X_data_tr.csv', index_col='date', parse_dates=True)
y=pd.read_csv('../data/y_data_tr.csv', index_col='date', parse_dates=True)

selected_features=list(df_feature[df_feature.select==1]['variable'])
# X_train=np.array(X[selected_features][:-96])
# y_train=np.array(y['y_oecd'][:-96])
# X_test=np.array(X[selected_features][-96:])
# y_test=np.array(y['y_oecd'][-96:])

X_train, y_train = create_dataset(X[selected_features][:-96].to_numpy())
print(X_train.shape)
print(y_train.shape)
X_train = X_train.to(torch.float32)
y_train = y_train.to(torch.float32)

torch.Size([491, 1, 16])
torch.Size([491, 1, 16])


In [31]:
# training 

import numpy as np
import torch.optim as optim
import torch.utils.data as data

model = AirModel()
optimizer = optim.Adam(model.parameters())
loss_fn = nn.MSELoss()
loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=8)

n_epochs = 2000
for epoch in range(n_epochs):
    model.train()
    for X_batch, y_batch in loader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    # Validation
    if epoch % 100 != 0:
        continue
    model.eval()

    with torch.no_grad():
        y_pred = model(X_train)
        train_rmse = np.sqrt(loss_fn(y_pred, y_train))
        y_pred = model(X_test)
        test_rmse = np.sqrt(loss_fn(y_pred, y_test))
    print("Epoch %d: train RMSE %.4f, test RMSE %.4f" % (epoch, train_rmse, test_rmse))

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


AttributeError: 'numpy.ndarray' object has no attribute 'dim'

In [None]:
# plot

import matplotlib.pyplot as plt

train_size = len(y_train)

with torch.no_grad():
    # shift train predictions for plotting
    train_plot = np.ones_like(timeseries) * np.nan
    y_pred = model(X_train)
    y_pred = y_pred[:, -1, :]
    train_plot[lookback:train_size] = model(X_train)[:, -1, :]
    # shift test predictions for plotting
    test_plot = np.ones_like(timeseries) * np.nan
    test_plot[train_size+lookback:len(timeseries)] = model(X_test)[:, -1, :]
# plot
plt.plot(timeseries, c='b')
plt.plot(train_plot, c='r')
plt.plot(test_plot, c='g')
plt.show()