In [1]:
from __future__ import print_function
import argparse
import torch
import torch.utils.data
from torch import nn, optim
from torch.nn import functional as F
from torchvision import datasets, transforms
from torchvision.utils import save_image
import torchvision.datasets as datasets
#from vae import VAE
from vaecnn import VAECNN
#from util import train,test

In [2]:
# Reconstruction + KL divergence losses summed over all elements and batch
def loss_function(recon_x, x, mu, logvar):
    #BCE = F.binary_cross_entropy(recon_x, x, reduction='sum') #.view(-1, 784)
    BCE = F.mse_loss(recon_x, x, size_average=False) / 150
    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = (-0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()))/ (150)
    
    print('BCE',BCE)
    print('KLD',KLD)
    
    return  BCE + 0.01 * (KLD )


def train(epoch):
    model.train()
    train_loss = 0
    for batch_idx, (data, _) in enumerate(train_loader):
        data = data.to(device)
        #print(data)
        optimizer.zero_grad()
        recon_batch, mu, logvar,_ = model(data)
        loss = loss_function(recon_batch, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader),
                loss.item() / len(data)))
    print('====> Epoch: {} Average loss: {:.4f}'.format(
        epoch, train_loss / len(train_loader.dataset)))

def test(epoch):
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for i, (data, _) in enumerate(test_loader):
            data = data.to(device)
            recon_batch, mu, logvar, _ = model(data)
            test_loss += loss_function(recon_batch, data, mu, logvar).item()
#             if i == 0:
#                 n = min(data.size(0), 8)
#                 comparison = torch.cat([data[:n],
#                                       recon_batch.view(args.batch_size, 1, 28, 28)[:n]])
#                 save_image(comparison.cpu(),
#                          'results/reconstruction_' + str(epoch) + '.png', nrow=n)

    test_loss /= len(test_loader.dataset)
    print('====> Test set loss: {:.4f}'.format(test_loss))


In [3]:
class Namespace:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

In [4]:
args = Namespace(
    batch_size = 150,
    epochs = 10,
    cuda = True,
    seed = 2019,
    log_interval = 1)

In [5]:
torch.manual_seed(args.seed)
device = torch.device("cuda" if args.cuda else "cpu")

In [6]:
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}

trainDataDir = '/scratch/um367/DL/data/sample_4/train'
valDataDir = '/scratch/um367/DL/data/sample_4/val'
# trainDataDir = '/scratch/um367/DL/data/sampledata/supervised/train'
# valDataDir = '/scratch/um367/DL/data/sampledata/supervised/val'


train_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(trainDataDir, transform=transforms.ToTensor()),
    batch_size=args.batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(valDataDir, transform=transforms.ToTensor()),
    batch_size=args.batch_size, shuffle=True, **kwargs)

In [7]:
print(len(train_loader.dataset))

128000


In [8]:
model = VAECNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [9]:
## load parameters
# to load
checkpoint = torch.load('vaetest3.pth.tar')
model.load_state_dict(checkpoint['model_state_dict'])

In [None]:
for epoch in range(1, args.epochs + 1):
    train(epoch)
    #test(epoch)
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
            }, 'vaetest3.pth.tar')
#     with torch.no_grad():
#         sample = torch.randn(64, 20).to(device)
#         sample = model.decode(sample).cpu()
#         save_image(sample.view(64, 1, 28, 28),
#         'results/sample_' + str(epoch) + '.png')



BCE tensor(552.2264, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.3819, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(2002.8623, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(299.8569, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(723.6709, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(264.9092, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(708.6439, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(243.3640, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(888.0856, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(236.4135, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(850.8558, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(229.0626, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(736.2533, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(217.1159, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(667.2164, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(216.2951, device='cuda:0', grad_fn=<DivBackward0>)
BCE ten

BCE tensor(560.4083, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(310.5019, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(574.0785, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(304.6717, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(548.4348, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(313.8286, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(533.0632, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(308.9372, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(551.2176, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(304.2804, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(570.6559, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(309.0182, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(491.6093, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(303.8236, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(584.0413, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(302.4751, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(563.3452, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.8204, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(505.5283, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.6667, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(516.1605, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(280.4745, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(556.7616, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(276.8505, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(496.6679, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(277.0527, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(531.8563, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.2789, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(542.3220, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.0914, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(541.6531, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.5897, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(542.9590, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.4746, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(506.8703, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(276.5028, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(532.5193, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.7695, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(502.9397, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(275.3576, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(523.3586, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.9501, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(527.1189, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.2020, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(501.0300, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.4931, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(491.3507, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.1258, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(516.7764, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.6058, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(527.0693, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.5028, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(522.8018, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.1735, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(474.7290, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(275.5630, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(567.0647, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.6799, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(478.6946, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.4144, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(487.8228, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(269.2424, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(491.1429, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.4296, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(534.8148, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(268.3899, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(523.1342, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(269.4105, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(545.3962, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.1814, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(518.5012, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.4404, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(486.4476, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(276.4547, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(526.8992, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.4525, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(528.1583, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(275.5352, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(479.4628, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.8294, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(501.0479, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.4660, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(484.1327, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.8364, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(550.9592, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(276.9483, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(514.2662, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.5388, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(503.6111, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.2950, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(504.8550, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.1312, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(510.4722, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.0735, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(529.0258, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.3155, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(543.2403, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(278.6271, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(539.2082, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.9449, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(475.6402, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.9744, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(518.3062, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.6754, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(528.0799, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.3016, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(506.1586, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(275.1473, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(499.1601, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(277.1253, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(479.4987, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.7448, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(547.5522, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.0392, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(468.9226, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(275.1915, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(514.0413, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.8494, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(535.5192, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(269.2231, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(507.9880, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(268.8330, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(518.9523, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(268.7752, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(491.5691, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.0214, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(517.0712, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.1978, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(520.2433, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(267.9852, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(506.7298, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.9332, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(437.4363, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.3657, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(508.2308, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(269.6867, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(480.0595, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.2393, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(454.4374, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.4378, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(522.9999, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(269.9800, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(491.9260, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(276.7167, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(525.8064, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.9818, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(514.9600, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.1237, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(509.4407, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(268.2461, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(503.4355, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(267.4557, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(546.4395, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(268.1544, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(536.6884, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(266.5736, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(508.1427, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.5240, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(502.4262, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(266.8077, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(510.2027, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.0530, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(512.8564, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(268.6724, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(495.3109, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.0650, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(539.4012, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.4937, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(524.2701, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(267.4419, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(495.2928, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(268.9156, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(476.0027, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.9840, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(533.7897, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.0687, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(489.4214, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(268.5142, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(505.5006, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.2442, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(514.2485, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(268.3707, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(499.2663, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(269.5608, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(551.0292, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.4433, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(480.1960, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.1720, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(538.9689, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.5994, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(495.2324, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.3252, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(525.1867, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(269.7216, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(523.3389, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.0373, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(487.1800, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.0724, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(511.8202, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.0310, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(496.7814, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.0338, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(508.2208, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(276.0850, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(497.7814, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.6658, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(494.2543, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.4397, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(515.4941, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(275.8769, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(484.7723, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.0675, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(523.1111, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.5168, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(535.5014, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.8330, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(494.9510, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(278.5033, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(515.0516, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(276.5000, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(478.2493, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.1834, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(500.0765, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.7805, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(548.4413, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.2495, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(531.2155, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.1399, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(504.9168, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.7068, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(561.6208, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.2119, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(525.3447, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.8883, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(518.7261, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(268.0286, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(549.0167, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(269.1748, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(541.6259, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.0488, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(517.4509, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.0336, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(476.7625, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(268.1641, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(543.4353, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(269.8901, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(524.7815, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.7524, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(571.4108, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.9071, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(504.7488, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.1016, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(527.2524, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.1303, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(476.5883, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.9738, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(500.5799, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(268.6512, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(507.1019, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(268.4020, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(538.1647, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.6485, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(544.6807, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.4765, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(550.9719, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.7827, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(507.8448, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(275.5982, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(471.0324, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.7614, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(567.0014, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(277.7190, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(539.2080, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(269.3806, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(499.0862, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.7159, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(478.5642, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.9289, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(521.9296, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(275.0562, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(487.6801, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(265.5221, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(486.7568, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.1058, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(169.7923, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(89.7793, device='cuda:0', grad_fn=<DivBackward0>)
====> Epoch: 1 Average loss: 3.5198
BCE tensor(502.2808, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.3318, device='cuda:0

BCE tensor(483.8606, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.1814, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(533.3906, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.7796, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(508.3089, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(272.3137, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(503.9040, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.3496, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(519.5315, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.7523, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(491.6806, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(269.4626, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(531.6710, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.0196, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(457.5159, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.1121, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

BCE tensor(547.6427, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(269.0491, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(509.9389, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(268.9966, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(514.6300, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.3026, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(514.7987, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(273.7507, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(517.9833, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(270.3056, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(533.8817, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(271.1250, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(491.0026, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(274.4300, device='cuda:0', grad_fn=<DivBackward0>)
BCE tensor(536.2814, device='cuda:0', grad_fn=<DivBackward0>)
KLD tensor(275.2030, device='cuda:0', grad_fn=<DivBackward0>)
BCE tens

Process Process-2:
Traceback (most recent call last):
  File "/share/apps/python/2.7.12/intel/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/share/apps/python/2.7.12/intel/lib/python2.7/multiprocessing/process.py", line 114, in run
    self._target(*self._args, **self._kwargs)
  File "/scratch/um367/urwa-env/py2.7.12/lib/python2.7/site-packages/torch/utils/data/dataloader.py", line 106, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/scratch/um367/urwa-env/py2.7.12/lib/python2.7/site-packages/torch/utils/data/dataloader.py", line 187, in default_collate
    return [default_collate(samples) for samples in transposed]
  File "/scratch/um367/urwa-env/py2.7.12/lib/python2.7/site-packages/torch/utils/data/dataloader.py", line 164, in default_collate
    return torch.stack(batch, 0, out=out)
KeyboardInterrupt
