In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils import clip_grad_norm_
from torch.autograd import Variable
from torchsummary import summary

from torchvision.utils import save_image
from IPython.core.display import Image, display

import numpy as np
import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
# Truncated backpropagation
def detach(states):
    return [state.detach() for state in states] 

In [4]:
bsz = 20
epochs = 20
seqlen = 30

zsize = 32
nhidden = 256

In [5]:
z = torch.from_numpy(np.load('z.npy'))
z = z.view(bsz, -1, z.size(1))
z.shape

torch.Size([20, 1500, 32])

In [6]:
class V(nn.Module):
    def __init__(self, nembed, nhidden=265, nlayers=1):
        super(V, self).__init__()

        self.nhidden = nhidden
        self.nlayers = nlayers
        self.hidden = self.init_hidden()
        
        self.lstm = nn.LSTM(nembed, nhidden, nlayers, batch_first=True)
        self.linear = nn.Linear(nhidden, nembed)
        
    def forward(self, x, h):
        # Forward propagate LSTM
        out, (h, c) = self.lstm(x, h)
        out = self.linear(out)
        return out, (h, c)
    
    def init_hidden(self):
        return (torch.zeros(self.nlayers, bsz, self.nhidden).to(device),
                torch.zeros(self.nlayers, bsz, self.nhidden).to(device))


model = V(zsize, nhidden)
model

V(
  (lstm): LSTM(32, 256, batch_first=True)
  (linear): Linear(in_features=256, out_features=32, bias=True)
)

In [7]:
# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

In [8]:
# Train the model
for epoch in range(epochs):
    # Set initial hidden and cell states
    hidden = model.init_hidden()
    
    for i in range(0, z.size(1) - seqlen, seqlen):
        # Get mini-batch inputs and targets
        inputs = z[:, i:i+seqlen, :].to(device)
        targets = z[:, (i+1):(i+1)+seqlen, :].to(device)
        
        # Forward pass
        hidden = detach(hidden)
        outputs, hidden = model(inputs, hidden)
        loss = criterion(outputs, targets)
        
        # Backward and optimize
        model.zero_grad()
        loss.backward()
        clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()
        
        if i % 500 == 0:
            print ('Epoch [{}/{}], Step[{}], Loss: {:.4f}'
                   .format(epoch+1, epochs, i, loss.item()))

Epoch [1/20], Step[0], Loss: 0.6621
Epoch [2/20], Step[0], Loss: 0.0864
Epoch [3/20], Step[0], Loss: 0.0442
Epoch [4/20], Step[0], Loss: 0.0319
Epoch [5/20], Step[0], Loss: 0.0280
Epoch [6/20], Step[0], Loss: 0.0228
Epoch [7/20], Step[0], Loss: 0.0213
Epoch [8/20], Step[0], Loss: 0.0192
Epoch [9/20], Step[0], Loss: 0.0193
Epoch [10/20], Step[0], Loss: 0.0172
Epoch [11/20], Step[0], Loss: 0.0168
Epoch [12/20], Step[0], Loss: 0.0167
Epoch [13/20], Step[0], Loss: 0.0152
Epoch [14/20], Step[0], Loss: 0.0143
Epoch [15/20], Step[0], Loss: 0.0138
Epoch [16/20], Step[0], Loss: 0.0134
Epoch [17/20], Step[0], Loss: 0.0128
Epoch [18/20], Step[0], Loss: 0.0122
Epoch [19/20], Step[0], Loss: 0.0122
Epoch [20/20], Step[0], Loss: 0.0114


In [9]:
hidden = model.init_hidden()
z1 = model(z, hidden)[0]

In [31]:
np.linalg.norm(z[0, 0, :].data - z1[0, 1, :].data)

0.6826545