In [1]:
from __future__ import print_function
import argparse
import torch
import torch.utils.data
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms
from torchvision.utils import save_image
import torchvision.datasets as datasets
#from vae import VAE
from vaecnn import VAECNN
#from util import train,test

In [2]:
# Reconstruction + KL divergence losses summed over all elements and batch
def loss_function(recon_x, x, mu, logvar):
    #BCE = F.binary_cross_entropy(recon_x, x, reduction='sum') #.view(-1, 784)
    BCE = F.mse_loss(recon_x, x, size_average=True)
    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = (-0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()))/64
    
    print('BCE',BCE)
    print('KLD',KLD)
    
    return  BCE + 0.01 * (KLD )


def train(epoch):
    model.train()
    train_loss = 0
    for batch_idx, (data, _) in enumerate(train_loader):
        data = data.to(device)
        #print(data)
        optimizer.zero_grad()
        recon_batch, mu, logvar,_ = model(data)
        loss = loss_function(recon_batch, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader),
                loss.item() / len(data)))
    print('====> Epoch: {} Average loss: {:.4f}'.format(
        epoch, train_loss / len(train_loader.dataset)))

def test(epoch):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for i, (data, _) in enumerate(test_loader):
            data = data.to(device)
            recon_batch, mu, logvar, _ = model(data)
            test_loss += loss_function(recon_batch, data, mu, logvar).item()
#             if i == 0:
#                 n = min(data.size(0), 8)
#                 comparison = torch.cat([data[:n],
#                                       recon_batch.view(args.batch_size, 1, 28, 28)[:n]])
#                 save_image(comparison.cpu(),
#                          'results/reconstruction_' + str(epoch) + '.png', nrow=n)

    test_loss /= len(test_loader.dataset)
    print('====> Test set loss: {:.4f}'.format(test_loss))


In [3]:
class Namespace:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

In [4]:
args = Namespace(
    batch_size = 150,
    epochs = 10,
    cuda = True,
    seed = 2019,
    log_interval = 1)

In [5]:
torch.manual_seed(args.seed)
device = torch.device("cuda" if args.cuda else "cpu")

In [6]:
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}

trainDataDir = '/scratch/um367/DL/data/sample_4/train'
valDataDir = '/scratch/um367/DL/data/sample_4/val'
# trainDataDir = '/scratch/um367/DL/data/sampledata/supervised/train'
# valDataDir = '/scratch/um367/DL/data/sampledata/supervised/val'


train_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(trainDataDir, transform=transforms.ToTensor()),
    batch_size=args.batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(valDataDir, transform=transforms.ToTensor()),
    batch_size=args.batch_size, shuffle=True, **kwargs)

In [7]:
print(len(train_loader.dataset))

128000


In [8]:
model = VAECNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [9]:
## load parameters
# to load
checkpoint = torch.load('vaetest2.pth.tar')
model.load_state_dict(checkpoint['model_state_dict'])

In [10]:
for epoch in range(1, args.epochs + 1):
    train(epoch)
    #test(epoch)
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
            }, 'vaetest2.pth.tar')
#     with torch.no_grad():
#         sample = torch.randn(64, 20).to(device)
#         sample = model.decode(sample).cpu()
#         save_image(sample.view(64, 1, 28, 28),
#         'results/sample_' + str(epoch) + '.png')



BCE tensor(0.0860, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0016, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0786, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.5113, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0864, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.1335, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0766, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.3317, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0800, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0707, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0845, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0750, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0797, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.1803, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0871, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.1491, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.082

BCE tensor(0.0822, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0019, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0737, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0014, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0865, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0017, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0784, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0019, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0776, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0016, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0829, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0789, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0014, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0836, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0015, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.077

BCE tensor(0.0769, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0009, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0895, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0812, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0009, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0789, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0787, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0790, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0806, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0791, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.084

BCE tensor(0.0860, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0014, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0816, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0838, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0841, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0766, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0791, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0012, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0779, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0761, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.085

BCE tensor(0.0802, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0840, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0823, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0783, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0771, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0806, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0825, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0824, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.076

BCE tensor(0.0799, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0009, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0805, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0009, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0854, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0804, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0822, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0711, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0009, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0790, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0826, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.078

BCE tensor(0.0753, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0754, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0775, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0799, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0797, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0737, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0817, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0849, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0012, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.080

BCE tensor(0.0811, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0794, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0009, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0816, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0829, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0794, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0789, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0812, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0823, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.071

BCE tensor(0.0813, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0012, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0806, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0797, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0801, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0717, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0012, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0788, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0014, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0775, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0015, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0776, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0017, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.086

BCE tensor(0.0817, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0827, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0014, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0769, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0862, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0015, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0811, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0016, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0843, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0019, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0798, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0015, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0819, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0019, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.083

BCE tensor(0.0741, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0894, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0022, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0738, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0014, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0817, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0017, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0823, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0019, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0797, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0021, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0794, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0023, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0784, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0019, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.079

BCE tensor(0.0759, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0016, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0849, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0012, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0827, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0012, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0849, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0014, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0746, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0837, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0014, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0883, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0016, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0832, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.083

BCE tensor(0.0816, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0030, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0820, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0043, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0802, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0039, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0864, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0040, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0796, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0042, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0795, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0029, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0845, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0037, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0819, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0036, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.082

BCE tensor(0.0818, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0097, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0794, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0086, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0736, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0086, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0780, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0110, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0841, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0109, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0837, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0109, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0791, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0088, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0842, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0108, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.074

BCE tensor(0.0772, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0081, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0813, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0085, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0796, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0073, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0822, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0077, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0831, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0113, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0841, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0101, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0770, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0097, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0756, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0105, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.079

BCE tensor(0.0876, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0094, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0823, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0104, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0828, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0112, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0794, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0085, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0806, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0086, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0802, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0094, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0830, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0090, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0859, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0116, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.081

BCE tensor(0.0781, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0239, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0814, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0362, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0828, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0331, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0865, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0456, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0844, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0342, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0783, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0675, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0772, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0590, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0762, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.0350, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.079

BCE tensor(0.0859, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.1343, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0773, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.1729, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0818, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.2627, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0718, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.1685, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0795, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.3001, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0769, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.2419, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0766, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.2083, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.0768, device='cuda:0', grad_fn=<MseLossBackward>)
KLD tensor(0.2847, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(0.072

====> Epoch: 1 Average loss: 0.0005
BCE tensor(0.0719, device='cuda:0')
KLD tensor(0.5303, device='cuda:0')
BCE tensor(0.0732, device='cuda:0')
KLD tensor(0.5526, device='cuda:0')
BCE tensor(0.0750, device='cuda:0')
KLD tensor(0.6311, device='cuda:0')
BCE tensor(0.0745, device='cuda:0')
KLD tensor(0.7787, device='cuda:0')
BCE tensor(0.0701, device='cuda:0')
KLD tensor(0.5725, device='cuda:0')
BCE tensor(0.0753, device='cuda:0')
KLD tensor(0.6085, device='cuda:0')
BCE tensor(0.0771, device='cuda:0')
KLD tensor(0.7773, device='cuda:0')
BCE tensor(0.0714, device='cuda:0')
KLD tensor(0.5668, device='cuda:0')
BCE tensor(0.0784, device='cuda:0')
KLD tensor(0.6356, device='cuda:0')
BCE tensor(0.0712, device='cuda:0')
KLD tensor(0.6100, device='cuda:0')
BCE tensor(0.0737, device='cuda:0')
KLD tensor(0.5283, device='cuda:0')
BCE tensor(0.0774, device='cuda:0')
KLD tensor(0.7003, device='cuda:0')
BCE tensor(0.0756, device='cuda:0')
KLD tensor(0.5365, device='cuda:0')
BCE tensor(0.0706, device='c

BCE tensor(0.0692, device='cuda:0')
KLD tensor(0.5555, device='cuda:0')
BCE tensor(0.0715, device='cuda:0')
KLD tensor(0.5380, device='cuda:0')
BCE tensor(0.0782, device='cuda:0')
KLD tensor(0.5655, device='cuda:0')
BCE tensor(0.0703, device='cuda:0')
KLD tensor(0.6174, device='cuda:0')
BCE tensor(0.0759, device='cuda:0')
KLD tensor(0.7151, device='cuda:0')
BCE tensor(0.0746, device='cuda:0')
KLD tensor(0.5407, device='cuda:0')
BCE tensor(0.0746, device='cuda:0')
KLD tensor(0.7002, device='cuda:0')
BCE tensor(0.0745, device='cuda:0')
KLD tensor(0.6648, device='cuda:0')
BCE tensor(0.0751, device='cuda:0')
KLD tensor(0.6067, device='cuda:0')
BCE tensor(0.0743, device='cuda:0')
KLD tensor(0.6301, device='cuda:0')
BCE tensor(0.0807, device='cuda:0')
KLD tensor(0.6699, device='cuda:0')
BCE tensor(0.0682, device='cuda:0')
KLD tensor(0.5644, device='cuda:0')
BCE tensor(0.0774, device='cuda:0')
KLD tensor(0.5952, device='cuda:0')
BCE tensor(0.0739, device='cuda:0')
KLD tensor(0.6677, device='c

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/scratch/um367/urwa-env/py2.7.12/lib/python2.7/site-packages/IPython/core/ultratb.py", line 1132, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
  File "/scratch/um367/urwa-env/py2.7.12/lib/python2.7/site-packages/IPython/core/ultratb.py", line 313, in wrapped
    return f(*args, **kwargs)
  File "/scratch/um367/urwa-env/py2.7.12/lib/python2.7/site-packages/IPython/core/ultratb.py", line 358, in _fixed_getinnerframes
    records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))
  File "/share/apps/python/2.7.12/intel/lib/python2.7/inspect.py", line 1049, in getinnerframes
    framelist.append((tb.tb_frame,) + getframeinfo(tb, context))
  File "/share/apps/python/2.7.12/intel/lib/python2.7/inspect.py", line 1009, in getframeinfo
    filename = getsourcefile(frame) or getfile(frame)
  File "/share/apps/python/2.7.12/intel/lib/python2.7/inspect.py", line 454, in getsourcefile
    if h


KeyboardInterrupt

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/scratch/um367/urwa-env/py2.7.12/lib/python2.7/site-packages/IPython/core/ultratb.py", line 1132, in get_records
    return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
  File "/scratch/um367/urwa-env/py2.7.12/lib/python2.7/site-packages/IPython/core/ultratb.py", line 313, in wrapped
    return f(*args, **kwargs)
  File "/scratch/um367/urwa-env/py2.7.12/lib/python2.7/site-packages/IPython/core/ultratb.py", line 358, in _fixed_getinnerframes
    records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))
  File "/share/apps/python/2.7.12/intel/lib/python2.7/inspect.py", line 1049, in getinnerframes
    framelist.append((tb.tb_frame,) + getframeinfo(tb, context))
  File "/share/apps/python/2.7.12/intel/lib/python2.7/inspect.py", line 1009, in getframeinfo
    filename = getsourcefile(frame) or getfile(frame)
  File "/share/apps/python/2.7.12/intel/lib/python2.7/inspect.py", line 454, in getsourcefile
    if h

IndexError: string index out of range