In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils import clip_grad_norm_
from torch.autograd import Variable
from torchsummary import summary

from torchvision.utils import save_image
from IPython.core.display import Image, display

import numpy as np
import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
# Truncated backpropagation
def detach(states):
    return [state.detach() for state in states] 

In [6]:
bsz = 20
epochs = 50
seqlen = 30

zsize = 32
nhidden = 256

In [7]:
z = torch.from_numpy(np.load('z.npy'))
z = z.view(bsz, -1, z.size(1)).to(device)
z.shape

torch.Size([20, 1500, 32])

In [8]:
class V(nn.Module):
    def __init__(self, nembed, nhidden=265, nlayers=1):
        super(V, self).__init__()

        self.nhidden = nhidden
        self.nlayers = nlayers
        self.hidden = self.init_hidden()
        
        self.lstm = nn.LSTM(nembed, nhidden, nlayers, batch_first=True)
        self.linear = nn.Linear(nhidden, nembed)
        
    def forward(self, x, h):
        # Forward propagate LSTM
        out, (h, c) = self.lstm(x, h)
        out = self.linear(out)
        return out, (h, c)
    
    def init_hidden(self):
        return (torch.zeros(self.nlayers, bsz, self.nhidden).to(device),
                torch.zeros(self.nlayers, bsz, self.nhidden).to(device))


model = V(zsize, nhidden).to(device)
model

V(
  (lstm): LSTM(32, 256, batch_first=True)
  (linear): Linear(in_features=256, out_features=32, bias=True)
)

In [9]:
# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

In [10]:
# Train the model
for epoch in range(epochs):
    # Set initial hidden and cell states
    hidden = model.init_hidden()
    
    for i in range(0, z.size(1) - seqlen, seqlen):
        # Get mini-batch inputs and targets
        inputs = z[:, i:i+seqlen, :]
        targets = z[:, (i+1):(i+1)+seqlen, :]
        
        # Forward pass
        hidden = detach(hidden)
        outputs, hidden = model(inputs, hidden)
        loss = criterion(outputs, targets)
        
        # Backward and optimize
        model.zero_grad()
        loss.backward()
        clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()
        
    if epoch % 10 == 0:
        print ('Epoch [{}/{}], Loss: {:.4f}'
               .format(epoch+1, epochs, loss.item()))

Epoch [1/50], Loss: 0.2224
Epoch [11/50], Loss: 0.0331
Epoch [21/50], Loss: 0.0253
Epoch [31/50], Loss: 0.0218
Epoch [41/50], Loss: 0.0201


In [11]:
hidden = model.init_hidden()
z1 = model(z, hidden)[0]

In [47]:
# np.linalg.norm(z[0, 0, :].cpu().data - z1[0, :, :].cpu().data, axis=1)
# np.linalg.norm(inputs.cpu().data - targets.cpu().data)

array([0.78542835, 0.42561463, 0.67908615, 0.9246513 , 0.73405993,
       0.9516081 , 0.8526774 , 0.72184086, 0.88601476, 0.8110216 ],
      dtype=float32)