In [1]:
import os
import random
import time
import numpy as np
import torch
import math

from PIL import Image, ImageOps
from argparse import ArgumentParser

from torch.optim import SGD, Adam, lr_scheduler
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, CenterCrop, Normalize, Resize, Pad
from torchvision.transforms import ToTensor, ToPILImage

from dataset import VOC12,cityscapes
from transform import Relabel, ToLabel, Colorize
from visualize import Dashboard

import importlib
from iouEval import iouEval, getColorEntry

from shutil import copyfile

In [2]:
NUM_CHANNELS = 3
NUM_CLASSES = 34 #pascal=22, cityscapes=20

color_transform = Colorize(NUM_CLASSES)
image_transform = ToPILImage()

class Args():
    cuda =True  #NOTE: cpu-only has not been tested so you might have to change code if you deactivate this flag
    model = "erfnet"
    state = False
    port = 8097
    datadir = "/esat/toyota/trace/deeplearning/datasets_public/cityscapes/leftImg8bit_trainvaltest"
    height=512
    num_epochs=5
    num_workers=4
    batch_size=2
    steps_loss=50
    steps_plot=50
    epochs_save=0    #You can use this value to save model every X epochs
    savedir="~/Document/thesis_kontras/"
    decoder = False
    pretrainedEncoder =False #, default="../trained_models/erfnet_encoder_pretrained.pth.tar")
    visualize =False
    iouTrain =False #recommended: False (takes more time to train otherwise)
    iouVal = True  
    resume = False 
args= Args()

In [3]:
#Augmentations - different function implemented to perform random augments on both image and target
class MyCoTransform(object):
    def __init__(self, enc, augment=True, height=512):
        self.enc=enc
        self.augment = augment
        self.height = height
        pass
    def __call__(self, input, target):
        # do something to both images
        input =  Resize(self.height, Image.BILINEAR)(input)
        target = Resize(self.height, Image.NEAREST)(target)

        if(self.augment):
            # Random hflip
            hflip = random.random()
            if (hflip < 0.5):
                input = input.transpose(Image.FLIP_LEFT_RIGHT)
                target = target.transpose(Image.FLIP_LEFT_RIGHT)
            
            #Random translation 0-2 pixels (fill rest with padding
            transX = random.randint(-2, 2) 
            transY = random.randint(-2, 2)

            input = ImageOps.expand(input, border=(transX,transY,0,0), fill=0)
            target = ImageOps.expand(target, border=(transX,transY,0,0), fill=255) #pad label filling with 255
            input = input.crop((0, 0, input.size[0]-transX, input.size[1]-transY))
            target = target.crop((0, 0, target.size[0]-transX, target.size[1]-transY))   

        input = ToTensor()(input)
        if (self.enc):
            target = Resize(int(self.height/8), Image.NEAREST)(target)
        target = ToLabel()(target)
        target = Relabel(255, 19)(target)

        return input, target


class CrossEntropyLoss(torch.nn.Module):

    def __init__(self, weight=None):
        super().__init__()

        self.loss = torch.nn.NLLLoss(weight)

    def forward(self, outputs, targets):
        return self.loss(torch.nn.functional.log_softmax(outputs, dim=1), targets)

def save_checkpoint(state, is_best, filenameCheckpoint, filenameBest):
    torch.save(state, filenameCheckpoint)
    if is_best:
        print ("Saving model as best")
        torch.save(state, filenameBest)


In [4]:
savedir = f'../save/{args.savedir}'

if not os.path.exists(savedir):
    os.makedirs(savedir)

with open(savedir + '/opts.txt', "w") as myfile:
    myfile.write(str(args))

#Load Model
assert os.path.exists(args.model + ".py"), "Error: model definition not found"
model_file = importlib.import_module(args.model)
model = model_file.Net(NUM_CLASSES)
copyfile(args.model + ".py", savedir + '/' + args.model + ".py")

if args.cuda:
    model = torch.nn.DataParallel(model).cuda()

if args.state:
    #if args.state is provided then load this state for training
    #Note: this only loads initialized weights. If you want to resume a training use "--resume" option!!
    """
    try:
        model.load_state_dict(torch.load(args.state))
    except AssertionError:
        model.load_state_dict(torch.load(args.state,
            map_location=lambda storage, loc: storage))
    #When model is saved as DataParallel it adds a model. to each key. To remove:
    #state_dict = {k.partition('model.')[2]: v for k,v in state_dict}
    #https://discuss.pytorch.org/t/prefix-parameter-names-in-saved-model-if-trained-by-multi-gpu/494
    """
    def load_my_state_dict(model, state_dict):  #custom function to load model when not all dict keys are there
        own_state = model.state_dict()
        for name, param in state_dict.items():
            if name not in own_state:
                 continue
            own_state[name].copy_(param)
        return model

    #print(torch.load(args.state))
    model = load_my_state_dict(model, torch.load(args.state))


In [29]:
args = Args()
enc = True
best_acc = 0

#TODO: calculate weights by processing dataset histogram (now its being set by hand from the torch values)
#create a loder to run all images and calculate histogram of labels, then create weight array using class balancing

weight = torch.ones(NUM_CLASSES)
if (enc):
    weight[0] = 2.3653597831726	
    weight[1] = 4.4237880706787	
    weight[2] = 2.9691488742828	
    weight[3] = 5.3442072868347	
    weight[4] = 5.2983593940735	
    weight[5] = 5.2275490760803	
    weight[6] = 5.4394111633301	
    weight[7] = 5.3659925460815	
    weight[8] = 3.4170460700989	
    weight[9] = 5.2414722442627	
    weight[10] = 4.7376127243042	
    weight[11] = 5.2286224365234	
    weight[12] = 5.455126285553	
    weight[13] = 4.3019247055054	
    weight[14] = 5.4264230728149	
    weight[15] = 5.4331531524658	
    weight[16] = 5.433765411377	
    weight[17] = 5.4631009101868	
    weight[18] = 5.3947434425354
else:
    weight[0] = 2.8149201869965	
    weight[1] = 6.9850029945374	
    weight[2] = 3.7890393733978	
    weight[3] = 9.9428062438965	
    weight[4] = 9.7702074050903	
    weight[5] = 9.5110931396484	
    weight[6] = 10.311357498169	
    weight[7] = 10.026463508606	
    weight[8] = 4.6323022842407	
    weight[9] = 9.5608062744141	
    weight[10] = 7.8698215484619	
    weight[11] = 9.5168733596802	
    weight[12] = 10.373730659485	
    weight[13] = 6.6616044044495	
    weight[14] = 10.260489463806	
    weight[15] = 10.287888526917	
    weight[16] = 10.289801597595	
    weight[17] = 10.405355453491	
    weight[18] = 10.138095855713	

weight[19] = 0

assert os.path.exists(args.datadir), "Error: datadir (dataset directory) could not be loaded"

co_transform = MyCoTransform(enc, augment=True, height=args.height)#1024)
co_transform_val = MyCoTransform(enc, augment=False, height=args.height)#1024)
dataset_train = cityscapes(args.datadir, co_transform, 'train')
dataset_val = cityscapes(args.datadir, co_transform_val, 'val')

loader = DataLoader(dataset_train, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=True)
loader_val = DataLoader(dataset_val, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False)

if args.cuda:
    weight = weight.cuda()
criterion = CrossEntropyLoss(weight)
print(type(criterion))

savedir = f'../save/{args.savedir}'

if (enc):
    automated_log_path = savedir + "/automated_log_encoder.txt"
    modeltxtpath = savedir + "/model_encoder.txt"
else:
    automated_log_path = savedir + "/automated_log.txt"
    modeltxtpath = savedir + "/model.txt"    

if (not os.path.exists(automated_log_path)):    #dont add first line if it exists 
    with open(automated_log_path, "a") as myfile:
        myfile.write("Epoch\t\tTrain-loss\t\tTest-loss\t\tTrain-IoU\t\tTest-IoU\t\tlearningRate")

with open(modeltxtpath, "w") as myfile:
    myfile.write(str(model))


#TODO: reduce memory in first gpu: https://discuss.pytorch.org/t/multi-gpu-training-memory-usage-in-balance/4163/4        #https://github.com/pytorch/pytorch/issues/1893

#optimizer = Adam(model.parameters(), 5e-4, (0.9, 0.999),  eps=1e-08, weight_decay=2e-4)     ## scheduler 1
optimizer = Adam(model.parameters(), 5e-4, (0.9, 0.999),  eps=1e-08, weight_decay=1e-4)      ## scheduler 2

start_epoch = 1
if args.resume:
    #Must load weights, optimizer, epoch and best value. 
    if enc:
        filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar'
    else:
        filenameCheckpoint = savedir + '/checkpoint.pth.tar'

    assert os.path.exists(filenameCheckpoint), "Error: resume option was used but checkpoint was not found in folder"
    checkpoint = torch.load(filenameCheckpoint)
    start_epoch = checkpoint['epoch']
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    best_acc = checkpoint['best_acc']
    print("=> Loaded checkpoint at epoch {})".format(checkpoint['epoch']))

#scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5) # set up scheduler     ## scheduler 1
lambda1 = lambda epoch: pow((1-((epoch-1)/args.num_epochs)),0.9)  ## scheduler 2
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)                             ## scheduler 2

if args.visualize and args.steps_plot > 0:
    board = Dashboard(args.port)

for epoch in range(start_epoch, args.num_epochs+1):
    print("----- TRAINING - EPOCH", epoch, "-----")

    scheduler.step(epoch)    ## scheduler 2

    epoch_loss = []
    time_train = []

    doIouTrain = args.iouTrain   
    doIouVal =  args.iouVal      

    if (doIouTrain):
        iouEvalTrain = iouEval(NUM_CLASSES)

    usedLr = 0
    for param_group in optimizer.param_groups:
        print("LEARNING RATE: ", param_group['lr'])
        usedLr = float(param_group['lr'])

    model.train()
    for step, (images, labels) in enumerate(loader):

        start_time = time.time()
        #print (labels.size())
        #print (np.unique(labels.numpy()))
        #print("labels: ", np.unique(labels[0].numpy()))
        #labels = torch.ones(4, 1, 512, 1024).long()
        if args.cuda:
            images = images.cuda()
            labels = labels.cuda()

        inputs = Variable(images)
        targets = Variable(labels)
        outputs = model(inputs, only_encode=enc)

        #print("targets", np.unique(targets[:, 0].cpu().data.numpy()))

        optimizer.zero_grad()
        loss = criterion(outputs, targets[:, 0])
        loss.backward()
        optimizer.step()

        epoch_loss.append(loss.cpu().detach().numpy().item())
        time_train.append(time.time() - start_time)

        if (doIouTrain):
            #start_time_iou = time.time()
            iouEvalTrain.addBatch(outputs.max(1)[1].unsqueeze(1).data, targets.data)
            #print ("Time to add confusion matrix: ", time.time() - start_time_iou)      

        #print(outputs.size())
        if args.visualize and args.steps_plot > 0 and step % args.steps_plot == 0:
            start_time_plot = time.time()
            image = inputs[0].cpu().data
            #image[0] = image[0] * .229 + .485
            #image[1] = image[1] * .224 + .456
            #image[2] = image[2] * .225 + .406
            #print("output", np.unique(outputs[0].cpu().max(0)[1].data.numpy()))
            board.image(image, f'input (epoch: {epoch}, step: {step})')
            if isinstance(outputs, list):   #merge gpu tensors
                board.image(color_transform(outputs[0][0].cpu().max(0)[1].data.unsqueeze(0)),
                f'output (epoch: {epoch}, step: {step})')
            else:
                board.image(color_transform(outputs[0].cpu().max(0)[1].data.unsqueeze(0)),
                f'output (epoch: {epoch}, step: {step})')
            board.image(color_transform(targets[0].cpu().data),
                f'target (epoch: {epoch}, step: {step})')
            print ("Time to paint images: ", time.time() - start_time_plot)
        if args.steps_loss > 0 and step % args.steps_loss == 0:
            average = sum(epoch_loss) / len(epoch_loss)
            print(f'loss: {average:0.4} (epoch: {epoch}, step: {step})', 
                    "// Avg time/img: %.4f s" % (sum(time_train) / len(time_train) / args.batch_size))


    average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)

    iouTrain = 0
    if (doIouTrain):
        iouTrain, iou_classes = iouEvalTrain.getIoU()
        iouStr = getColorEntry(iouTrain)+'{:0.2f}'.format(iouTrain*100) + '\033[0m'
        print ("EPOCH IoU on TRAIN set: ", iouStr, "%")  

    #Validate on 500 val images after each epoch of training
    print("----- VALIDATING - EPOCH", epoch, "-----")
    model.eval()
    epoch_loss_val = []
    time_val = []

    if (doIouVal):
        iouEvalVal = iouEval(NUM_CLASSES)

    for step, (images, labels) in enumerate(loader_val):
        start_time = time.time()
        if args.cuda:
            images = images.cuda()
            labels = labels.cuda()

        inputs = Variable(images, volatile=True)    #volatile flag makes it free backward or outputs for eval
        targets = Variable(labels, volatile=True)
        outputs = model(inputs, only_encode=enc) 

        loss = criterion(outputs, targets[:, 0])
        epoch_loss_val.append(loss.data[0])
        time_val.append(time.time() - start_time)


        #Add batch to calculate TP, FP and FN for iou estimation
        if (doIouVal):
            #start_time_iou = time.time()
            iouEvalVal.addBatch(outputs.max(1)[1].unsqueeze(1).data, targets.data)
            #print ("Time to add confusion matrix: ", time.time() - start_time_iou)

        if args.visualize and args.steps_plot > 0 and step % args.steps_plot == 0:
            start_time_plot = time.time()
            image = inputs[0].cpu().data
            board.image(image, f'VAL input (epoch: {epoch}, step: {step})')
            if isinstance(outputs, list):   #merge gpu tensors
                board.image(color_transform(outputs[0][0].cpu().max(0)[1].data.unsqueeze(0)),
                f'VAL output (epoch: {epoch}, step: {step})')
            else:
                board.image(color_transform(outputs[0].cpu().max(0)[1].data.unsqueeze(0)),
                f'VAL output (epoch: {epoch}, step: {step})')
            board.image(color_transform(targets[0].cpu().data),
                f'VAL target (epoch: {epoch}, step: {step})')
            print ("Time to paint images: ", time.time() - start_time_plot)
        if args.steps_loss > 0 and step % args.steps_loss == 0:
            average = sum(epoch_loss_val) / len(epoch_loss_val)
            print(f'VAL loss: {average:0.4} (epoch: {epoch}, step: {step})', 
                    "// Avg time/img: %.4f s" % (sum(time_val) / len(time_val) / args.batch_size))


    average_epoch_loss_val = sum(epoch_loss_val) / len(epoch_loss_val)
    #scheduler.step(average_epoch_loss_val, epoch)  ## scheduler 1   # update lr if needed

    iouVal = 0
    if (doIouVal):
        iouVal, iou_classes = iouEvalVal.getIoU()
        iouStr = getColorEntry(iouVal)+'{:0.2f}'.format(iouVal*100) + '\033[0m'
        print ("EPOCH IoU on VAL set: ", iouStr, "%") 


    # remember best valIoU and save checkpoint
    if iouVal == 0:
        current_acc = -average_epoch_loss_val
    else:
        current_acc = iouVal 
    is_best = current_acc > best_acc
    best_acc = max(current_acc, best_acc)
    if enc:
        filenameCheckpoint = savedir + '/checkpoint_enc.pth.tar'
        filenameBest = savedir + '/model_best_enc.pth.tar'    
    else:
        filenameCheckpoint = savedir + '/checkpoint.pth.tar'
        filenameBest = savedir + '/model_best.pth.tar'
    save_checkpoint({
        'epoch': epoch + 1,
        'arch': str(model),
        'state_dict': model.state_dict(),
        'best_acc': best_acc,
        'optimizer' : optimizer.state_dict(),
    }, is_best, filenameCheckpoint, filenameBest)

    #SAVE MODEL AFTER EPOCH
    if (enc):
        filename = f'{savedir}/model_encoder-{epoch:03}.pth'
        filenamebest = f'{savedir}/model_encoder_best.pth'
    else:
        filename = f'{savedir}/model-{epoch:03}.pth'
        filenamebest = f'{savedir}/model_best.pth'
    if args.epochs_save > 0 and step > 0 and step % args.epochs_save == 0:
        torch.save(model.state_dict(), filename)
        print(f'save: {filename} (epoch: {epoch})')
    if (is_best):
        torch.save(model.state_dict(), filenamebest)
        print(f'save: {filenamebest} (epoch: {epoch})')
        if (not enc):
            with open(savedir + "/best.txt", "w") as myfile:
                myfile.write("Best epoch is %d, with Val-IoU= %.4f" % (epoch, iouVal))   
        else:
            with open(savedir + "/best_encoder.txt", "w") as myfile:
                myfile.write("Best epoch is %d, with Val-IoU= %.4f" % (epoch, iouVal))           

    #SAVE TO FILE A ROW WITH THE EPOCH RESULT (train loss, val loss, train IoU, val IoU)
    #Epoch		Train-loss		Test-loss	Train-IoU	Test-IoU		learningRate
    with open(automated_log_path, "a") as myfile:
        myfile.write("\n%d\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.4f\t\t%.8f" % (epoch, average_epoch_loss_train, average_epoch_loss_val, iouTrain, iouVal, usedLr ))
    
#     return(model)   #return model (convenience for encoder-decoder training)

/esat/toyota/trace/deeplearning/datasets_public/cityscapes/leftImg8bit_trainvaltest/leftImg8bit/train
/esat/toyota/trace/deeplearning/datasets_public/cityscapes/gtFine_trainvaltest/gtFine/train
/esat/toyota/trace/deeplearning/datasets_public/cityscapes/leftImg8bit_trainvaltest/leftImg8bit/val
/esat/toyota/trace/deeplearning/datasets_public/cityscapes/gtFine_trainvaltest/gtFine/val
<class '__main__.CrossEntropyLoss'>
----- TRAINING - EPOCH 1 -----
LEARNING RATE:  0.0005




loss: 1.689 (epoch: 1, step: 0) // Avg time/img: 0.2579 s
loss: 1.297 (epoch: 1, step: 50) // Avg time/img: 0.2263 s
loss: 1.195 (epoch: 1, step: 100) // Avg time/img: 0.2265 s
loss: 1.133 (epoch: 1, step: 150) // Avg time/img: 0.2266 s
loss: 1.1 (epoch: 1, step: 200) // Avg time/img: 0.2269 s
loss: 1.069 (epoch: 1, step: 250) // Avg time/img: 0.2271 s
loss: 1.052 (epoch: 1, step: 300) // Avg time/img: 0.2272 s
loss: 1.022 (epoch: 1, step: 350) // Avg time/img: 0.2274 s
loss: 1.006 (epoch: 1, step: 400) // Avg time/img: 0.2275 s
loss: 0.9892 (epoch: 1, step: 450) // Avg time/img: 0.2277 s
loss: 0.9712 (epoch: 1, step: 500) // Avg time/img: 0.2278 s
loss: 0.953 (epoch: 1, step: 550) // Avg time/img: 0.2279 s
loss: 0.9448 (epoch: 1, step: 600) // Avg time/img: 0.2280 s
loss: 0.939 (epoch: 1, step: 650) // Avg time/img: 0.2281 s
loss: 0.9244 (epoch: 1, step: 700) // Avg time/img: 0.2281 s
loss: 0.914 (epoch: 1, step: 750) // Avg time/img: 0.2282 s
loss: 0.9075 (epoch: 1, step: 800) // Avg



IndexError: invalid index of a 0-dim tensor. Use tensor.item() to convert a 0-dim tensor to a Python number

In [6]:
# np.shape(inputs.cpu().detach().numpy())

# outputs = model(inputs, only_encode=True)

In [17]:
epoch = 1
print("----- TRAINING - EPOCH", epoch, "-----")

# scheduler.step(epoch)    ## scheduler 2

epoch_loss = []
time_train = []

doIouTrain = args.iouTrain   
doIouVal =  args.iouVal      

if (doIouTrain):
    iouEvalTrain = iouEval(NUM_CLASSES)

usedLr = 0
for param_group in optimizer.param_groups:
    print("LEARNING RATE: ", param_group['lr'])
    usedLr = float(param_group['lr'])

model.train()
step =1  
(images, labels) = next(iter(loader))

start_time = time.time()
#print (labels.size())
#print (np.unique(labels.numpy()))
#print("labels: ", np.unique(labels[0].numpy()))
#labels = torch.ones(4, 1, 512, 1024).long()
if args.cuda:
    images = images.cuda()
    labels = labels.cuda()

inputs = Variable(images)
targets = Variable(labels)
outputs = model(inputs, only_encode=enc)

#print("targets", np.unique(targets[:, 0].cpu().data.numpy()))

optimizer.zero_grad()
loss = criterion(outputs, targets[:, 0])
loss.backward()
optimizer.step()

# epoch_loss.append(loss.data[0].item())
time_train.append(time.time() - start_time)

if (doIouTrain):
    #start_time_iou = time.time()
    iouEvalTrain.addBatch(outputs.max(1)[1].unsqueeze(1).data, targets.data)
    #print ("Time to add confusion matrix: ", time.time() - start_time_iou)      

#print(outputs.size())
if args.visualize and args.steps_plot > 0 and step % args.steps_plot == 0:
    start_time_plot = time.time()
    image = inputs[0].cpu().data
    #image[0] = image[0] * .229 + .485
    #image[1] = image[1] * .224 + .456
    #image[2] = image[2] * .225 + .406
    #print("output", np.unique(outputs[0].cpu().max(0)[1].data.numpy()))
    board.image(image, f'input (epoch: {epoch}, step: {step})')
    if isinstance(outputs, list):   #merge gpu tensors
        board.image(color_transform(outputs[0][0].cpu().max(0)[1].data.unsqueeze(0)),
        f'output (epoch: {epoch}, step: {step})')
    else:
        board.image(color_transform(outputs[0].cpu().max(0)[1].data.unsqueeze(0)),
        f'output (epoch: {epoch}, step: {step})')
    board.image(color_transform(targets[0].cpu().data),
        f'target (epoch: {epoch}, step: {step})')
    print ("Time to paint images: ", time.time() - start_time_plot)
if args.steps_loss > 0 and step % args.steps_loss == 0:
    average = sum(epoch_loss) / len(epoch_loss)
    print(f'loss: {average:0.4} (epoch: {epoch}, step: {step})', 
            "// Avg time/img: %.4f s" % (sum(time_train) / len(time_train) / args.batch_size))



----- TRAINING - EPOCH 1 -----
LEARNING RATE:  0.0005891598267371476


In [8]:
# np.shape(fo.cpu().detach().numpy())
# # loss = torch.nn.functional.log_softmax(outputs, dim=1), targets
# loss = torch.nn.NLLLoss()

# loss(fo,targets)

In [9]:
# fo= torch.nn.functional.log_softmax(outputs[0], dim=1)
# optimizer.zero_grad()
# # m = torch.nn.LogSoftmax(dim=1)
# # loss = torch.nn.NLLLoss()
# # fo = m(outputs)
# fl = criterion(outputs,targets[:,0])
# fl.backward()
# np.max(targets[:,0].cpu().detach().numpy())
# mymin = 0
# for step,(images, labels) in enumerate(loader):
#     print(step)
#     if np.min(labels.cpu().detach().numpy()) > mymin :
#         mymin = np.max(labels.cpu().detach().numpy())
    

In [10]:
# # 2D loss example (used, for example, with image inputs)
# N, C = 5, 4
# loss = torch.nn.NLLLoss()
# # input is of size N x C x height x width
# data = torch.randn(N, 16, 10, 10)
# conv = torch.nn.Conv2d(16, C, (3, 3))
# m = torch.nn.LogSoftmax(dim=1)
# # each element in target has to have 0 <= value < C
# target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, C)
# output = loss(m(conv(data)), target)

In [27]:
# np.max(targets[:,0].cpu().detach().numpy())
# np.shape(conv(data).detach().numpy())
# criterion()
# fl
# from matplotlib import pyplot as plt
# %pylab inline

# plt.imshow(outputs[1,19].cpu().detach().numpy(),cmap='gray')
# plt.show
# loss.cpu().detach().numpy()
# loss.backward()
loss.cpu().detach().numpy().item()
# a.item()

2.278388738632202