In [35]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import cv2
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pdb
import math

torch.cuda.empty_cache()

def get_random_crop(image, crop_height, crop_width):

    max_x = image.shape[1] - crop_width
    max_y = image.shape[0] - crop_height

    x = np.random.randint(0, max_x)
    y = np.random.randint(0, max_y)

    crop = image[y: y + crop_height, x: x + crop_width]

    return crop, x, y

def searchForFocus(filename, substring):
    with open(filename, 'r') as file:
        data = file.read()
        location = data.find(substring)
        croppedStr = data[location+len(substring):]
        # Split at spaces and find first number
        for word in croppedStr.split(): # Split at spaces
            # Delete any commas    
            word = word.replace(',', "")
            try:
                focusPosition = int(word)
                return focusPosition
            except ValueError:
                continue
    file.close()

class Dataset(torch.utils.data.Dataset):
    # ids indicates what subfolders (samples) to access
    def __init__(self, foldername, subfolderPrefix, ids):
        self.foldername = foldername
        self.subfolderPrefix = subfolderPrefix
        self.ids = ids
        
    def __len__(self):
        return len(self.ids)

    def __getitem__(self, index):
        sampleFoldername = self.foldername + '/' + self.subfolderPrefix + str(index)
        
        # H, W
        cropSize = (640, 640)
        
        images = []
        for i, prefix in enumerate(['before', 'after']):
        
            # Load in image as [0,1] array
            image = cv2.imread(sampleFoldername + '/' + prefix + str(index) + '.tif', 0) * 1 / 255.0

            # Shift it so is from [-1,1]
            image *= 2
            image -= 1
            
            if i == 0:
                # Randomly crop the image
                image, cornerX, cornerY = get_random_crop(image, cropSize[0], cropSize[1])
            else:
                # Crop the label image to the same region as the input
                image = image[cornerY:cornerY + cropSize[0], cornerX:cornerX + cropSize[1]]
            
            temp = torch.from_numpy(image)
            if i == 0:
                temp = temp.unsqueeze(0) # Add fake first dimension to specify 1-channel
            images.append(temp)
            
        return images
    

In [36]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

params = {'batch_size': 2,
          'shuffle': True,
          'num_workers': 2}

# Randomly partition the full list into a training set and validation set
numSamples = 100 # total number of samples collected
frac = 1/5 # fraction to be validation
np.random.seed(0)
permutedIds = np.random.permutation(range(numSamples))
splitPoint = int((1-frac) * len(permutedIds))
trainingIds = permutedIds[:splitPoint]
valIds = permutedIds[splitPoint:]

training_set = Dataset('/home/aofeldman/Desktop/AFdataCollection', 'sample', trainingIds)
training_generator = torch.utils.data.DataLoader(training_set, **params)

validation_set = Dataset('/home/aofeldman/Desktop/AFdataCollection', 'sample', valIds)
#validation_generator = torch.utils.data.DataLoader(validation_set, **params)

In [37]:
X, Y = training_set.__getitem__(1)

def imshow(img,wait):
    img = img / 2 + 0.5     # unnormalize
    npimg = np.squeeze(img.numpy())
    width = int(0.15 * npimg.shape[1])
    height = int(0.15 * npimg.shape[0])
    cv2.imshow("Hi",cv2.resize(npimg, (width, height)))
    cv2.waitKey(wait)
    cv2.destroyAllWindows()
imshow(X, 10000)
print(X.shape)
imshow(Y, 10000)
print(Y.shape)

torch.Size([1, 640, 640])
torch.Size([640, 640])


In [38]:
# Consider placing dropout layers after conv2d layers (conv2d -> batchnorm2d -> leakyReLU -> dropout(p=0.1))
# And also place after fully connected layers (linear -> leakyReLU -> dropout(p=0.3))
# TODO: Should actually figure out appropriate amount of padding for layers 

net = nn.Sequential(
    # Encoder section
    
    # Does not change channel dimensions
    nn.Conv2d(1, 4, kernel_size=9, stride=1, padding=4),
    nn.BatchNorm2d(4),
    nn.LeakyReLU(negative_slope = 0.1, inplace=True),
    # 1/4 channel dimensions
    nn.MaxPool2d(kernel_size=4, stride=4),
    # Does not change channel dimensions
    nn.Conv2d(4, 4, kernel_size=7, stride=1, padding=3),
    nn.BatchNorm2d(4),
    nn.LeakyReLU(negative_slope = 0.1, inplace=True),
    # 1/4 channel dimensions
    nn.MaxPool2d(kernel_size=2, stride=2),
    # Does not change channel dimensions
    nn.Conv2d(4, 4, kernel_size=5, stride=1, padding=2),
    nn.BatchNorm2d(4),
    nn.LeakyReLU(negative_slope = 0.1, inplace=True),
    # 1/4 channel dimensions
    nn.MaxPool2d(kernel_size=2, stride=2),
    
    # At this point:
    # Each channel has dimensions
    # H_out, W_out = (1/4)^3 * (H_in, W_in)
    
    # Decoder section
    nn.ConvTranspose2d(4, 4, kernel_size=5, stride=1, padding=2),
    nn.BatchNorm2d(4),
    nn.LeakyReLU(negative_slope = 0.1, inplace=True),
    nn.Upsample(scale_factor = 2, mode='bilinear'),
    
    nn.ConvTranspose2d(4, 4, kernel_size=7, stride=1, padding=2),
    nn.BatchNorm2d(4),
    nn.LeakyReLU(negative_slope = 0.1, inplace=True),
    nn.Upsample(scale_factor = 2, mode='bilinear'),
    
    nn.ConvTranspose2d(4, 4, kernel_size=9, stride=1, padding=2),
    nn.BatchNorm2d(4),
    nn.LeakyReLU(negative_slope = 0.1, inplace=True),
    nn.Upsample(scale_factor = 2, mode='bilinear'),
    
#     nn.ConvTranspose2d(4, 4, kernel_size=3, stride=1, padding=1),
#     nn.BatchNorm2d(4),
#     nn.LeakyReLU(negative_slope = 0.1, inplace=True),
#     nn.Upsample(scale_factor = (2, 2)),
    
    nn.ConvTranspose2d(4, 1, kernel_size=3, stride=1, padding=1),
    nn.Tanh(),
)

net = net.to(device)

In [39]:
from collections import OrderedDict

class EncoderBlock(nn.Module):
    def __init__(self, dimIn, dimOut, kernel, leakySlope, poolSize, use_norm):
        super(EncoderBlock, self).__init__()
        block = [nn.Conv2d(dimIn, dimOut, kernel_size=kernel, stride=1, padding= (kernel-1) // 2)]
        
        if use_norm:
            block += [nn.BatchNorm2d(dimOut)]
        block += [nn.LeakyReLU(negative_slope = leakySlope, inplace=True),
                  nn.MaxPool2d(kernel_size=poolSize, stride=poolSize)]
        
        self.block = nn.Sequential(*block)
                
    def forward(self, x):
        return self.block(x)
    
class DecoderBlock(nn.Module):
    def __init__(self, dimIn, dimOut, kernel, leakySlope, scale, use_norm):
        super(DecoderBlock, self).__init__()
        block = [nn.ConvTranspose2d(dimIn, dimOut, kernel_size=kernel, stride=1, padding= (kernel-1) // 2)]
        
        if use_norm:
            block += [nn.BatchNorm2d(dimOut)]
        block += [nn.LeakyReLU(negative_slope = leakySlope, inplace=True),
                  nn.Upsample(scale_factor = scale, mode = 'bilinear')]
        
        self.block = nn.Sequential(*block)
            
    def forward(self, x, earlierX = None):
        #print('Called Decoder forward')
        if earlierX is not None:
            #print('Shape of x: ', x.size())
            #print('Shape of earlierX: ', earlierX.size())
            combinedChannels = torch.cat([x, earlierX], 1)
            #print('Shape of combinedChannels: ', combinedChannels.size())
            return self.block(combinedChannels)
        else:
            return self.block(x)
        
class EndBlock(nn.Module):
    def __init__(self, dimIn, kernel):
        super(EndBlock, self).__init__()
        
        self.block = \
        nn.Sequential(nn.ConvTranspose2d(dimIn, 1, kernel_size=kernel, padding= (kernel-1) // 2), nn.Tanh())
        
    def forward(self, x):
        return self.block(x)
    
class Net(nn.Module):
    def __init__(self, numEncoder, numDecoder):
        super(Net, self).__init__()
        
        layers = [('e0', EncoderBlock(1, 16, 9, 0.1, 4, True))]
        #self.layers = [EncoderBlock(1, 4, 5, 0.1, 4, True, 'e0')]

        for i in range(1, numEncoder):
            layers += [('e' + str(i), EncoderBlock(16, 16, 7, 0.1, 4, True))]
            #self.layers += [EncoderBlock(4, 4, 5, 0.1, 4, True, 'e' + str(i))]

        for j in range(numDecoder):
            if j == 0:
                added = 0
            else:
                added = 16
            layers += [('d' + str(j), DecoderBlock(16 + added, 16, 7, 0.1, 4, True))]
            #self.layers += [DecoderBlock(4 + added, 4, 5, 0.1, 4, True, 'd' + str(j))]

        #layers += [('f', nn.ConvTranspose2d(4, 1, kernel_size=5, padding=2), nn.Tanh())]
        layers += [('f', EndBlock(16, 5))]
        
        #self.layers += [nn.ConvTranspose2d(4, 1, kernel_size=5, padding=2), nn.Tanh()]

        print('layers', layers)
        self.model = nn.Sequential(*[layers[i][1] for i in range(len(layers))])
        
        self.numEncoder = numEncoder
        self.numDecoder = numDecoder
        self.layers = layers
                
    def forward(self, x):
        # print('Calling forward')
        layerOutputs = {}
        prevVal = x.clone()
        for (name, group) in self.layers:
            #print('On layer: ' + block[0])
            if name[0] == 'd' and name[1] != '0':
                earlierBlock = 'e' + str(self.numEncoder -1 - int(name[1]))
                #print('Earlier block is: ' + earlierBlock)
                #print('Shape of earlier block: ', layerOutputs[earlierBlock].size())
                layerOutputs[name] = group.forward(prevVal, layerOutputs[earlierBlock])
            else:
                layerOutputs[name] = group.forward(prevVal)
            prevVal = layerOutputs[name].clone()
            
        return layerOutputs['f']
        
net = Net(3, 3).to(device)

layers [('e0', EncoderBlock(
  (block): Sequential(
    (0): Conv2d(1, 16, kernel_size=(9, 9), stride=(1, 1), padding=(4, 4))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.1, inplace=True)
    (3): MaxPool2d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  )
)), ('e1', EncoderBlock(
  (block): Sequential(
    (0): Conv2d(16, 16, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.1, inplace=True)
    (3): MaxPool2d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  )
)), ('e2', EncoderBlock(
  (block): Sequential(
    (0): Conv2d(16, 16, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.1, inplace=True)
    (3): MaxPool2d(kernel_

In [40]:
net.layers[0][1].block[0].weight

Parameter containing:
tensor([[[[ 4.5172e-02, -3.5105e-02,  8.4832e-02,  ..., -7.8500e-02,
           -2.5810e-02,  1.0118e-01],
          [ 4.8658e-02, -4.9998e-02, -9.6614e-02,  ...,  9.4954e-03,
           -7.3094e-02, -4.4319e-02],
          [ 1.6570e-03,  6.8352e-03,  8.6299e-02,  ...,  6.6104e-02,
            1.0013e-01,  8.8876e-03],
          ...,
          [ 2.4276e-02, -5.2115e-02, -1.0478e-01,  ...,  2.6821e-02,
           -4.1349e-02,  6.6597e-02],
          [ 1.7817e-02,  3.7118e-02,  9.9375e-02,  ...,  7.9136e-02,
           -5.2880e-02,  5.4280e-02],
          [ 4.5699e-02, -7.6915e-02, -3.1986e-02,  ...,  3.3411e-02,
            4.5653e-02,  1.7161e-02]]],


        [[[-3.0488e-02, -3.4963e-03, -5.9726e-02,  ..., -3.3503e-02,
           -1.0596e-01, -5.7234e-03],
          [ 6.2311e-02,  4.3994e-02, -4.0121e-02,  ...,  5.8841e-02,
           -7.2863e-02, -1.0797e-01],
          [-7.4127e-02,  5.5900e-02, -4.2355e-02,  ..., -1.0915e-01,
            1.1048e-01,  5.1843e-0

In [41]:
for p in net.parameters():
    print(p.data.shape)

torch.Size([16, 1, 9, 9])
torch.Size([16])
torch.Size([16])
torch.Size([16])
torch.Size([16, 16, 7, 7])
torch.Size([16])
torch.Size([16])
torch.Size([16])
torch.Size([16, 16, 7, 7])
torch.Size([16])
torch.Size([16])
torch.Size([16])
torch.Size([16, 16, 7, 7])
torch.Size([16])
torch.Size([16])
torch.Size([16])
torch.Size([32, 16, 7, 7])
torch.Size([16])
torch.Size([16])
torch.Size([16])
torch.Size([32, 16, 7, 7])
torch.Size([16])
torch.Size([16])
torch.Size([16])
torch.Size([16, 1, 5, 5])
torch.Size([1])


In [42]:
count = 0
for p in net.parameters():
    n_params = np.prod(list(p.data.shape)).item()
    count += n_params
    print(p.data)
print(f'total params: {count}')

tensor([[[[ 4.5172e-02, -3.5105e-02,  8.4832e-02,  ..., -7.8500e-02,
           -2.5810e-02,  1.0118e-01],
          [ 4.8658e-02, -4.9998e-02, -9.6614e-02,  ...,  9.4954e-03,
           -7.3094e-02, -4.4319e-02],
          [ 1.6570e-03,  6.8352e-03,  8.6299e-02,  ...,  6.6104e-02,
            1.0013e-01,  8.8876e-03],
          ...,
          [ 2.4276e-02, -5.2115e-02, -1.0478e-01,  ...,  2.6821e-02,
           -4.1349e-02,  6.6597e-02],
          [ 1.7817e-02,  3.7118e-02,  9.9375e-02,  ...,  7.9136e-02,
           -5.2880e-02,  5.4280e-02],
          [ 4.5699e-02, -7.6915e-02, -3.1986e-02,  ...,  3.3411e-02,
            4.5653e-02,  1.7161e-02]]],


        [[[-3.0488e-02, -3.4963e-03, -5.9726e-02,  ..., -3.3503e-02,
           -1.0596e-01, -5.7234e-03],
          [ 6.2311e-02,  4.3994e-02, -4.0121e-02,  ...,  5.8841e-02,
           -7.2863e-02, -1.0797e-01],
          [-7.4127e-02,  5.5900e-02, -4.2355e-02,  ..., -1.0915e-01,
            1.1048e-01,  5.1843e-02],
          ...,
   

tensor([[[[-3.0263e-02,  3.1649e-02,  4.1196e-04,  ...,  2.8601e-03,
            1.0124e-02, -1.3727e-03],
          [-2.7907e-02,  1.1833e-02, -2.7543e-02,  ..., -2.6296e-02,
           -9.9634e-03, -4.9190e-03],
          [ 8.4437e-03, -3.6664e-03,  1.0547e-02,  ...,  2.8150e-02,
           -1.1158e-02,  3.5685e-02],
          ...,
          [-1.3052e-02,  1.3868e-02, -2.0069e-03,  ...,  2.7464e-02,
            4.7245e-03,  2.0426e-03],
          [ 1.8029e-02,  7.0204e-04, -1.1340e-02,  ..., -2.0283e-02,
           -3.0878e-02,  1.7031e-02],
          [-3.2418e-02,  5.0140e-04, -1.0937e-02,  ..., -1.1664e-02,
            4.1196e-04,  3.1083e-02]],

         [[-1.2581e-02, -2.1447e-02,  3.4102e-02,  ..., -3.1430e-02,
           -1.8811e-02,  2.1902e-02],
          [-2.3596e-03,  1.5855e-02,  1.5521e-02,  ..., -2.4084e-02,
           -8.4380e-03,  1.1187e-02],
          [-2.3707e-02, -2.5351e-02, -2.6073e-02,  ..., -4.5783e-04,
           -2.6358e-02,  2.8317e-02],
          ...,
     

In [43]:
import torch.optim as optim

criterion = nn.MSELoss()
#optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.RMSprop(net.parameters())

In [44]:
# For early stopping, use blocks of epochs (say size 3), and a window (say size 10) of how far to look in the past.
# Then, compare the average loss on the current block to the block in the past. Another idea would be compare the variances.
# If the fraction is not sufficiently small, halt.

max_epochs = 100
# Should be stated as a fraction of the previous error
#max_frac = 0.999
#window = 10 # How many epochs in the past to compare to
#block = 5 # What size block of epochs to use

learnFreq = 10
batch_multiplier = 4

epoch_training_loss = []
epoch_val_loss = []


for epoch in range(max_epochs):
    print("\nOn epoch: " + str(epoch))

    count = 0
    
    net.train()
    for inputs, labels in training_generator:
        
        inputs, labels = inputs.to(device).float(), labels.to(device).float()
        
        if count == 0:
            optimizer.step()
            # zero the parameter gradients
            optimizer.zero_grad()
            count = batch_multiplier

        # forward + backward + optimize
        outputs = net(inputs).to(device)
        loss = criterion(torch.squeeze(outputs), labels) / batch_multiplier
        loss.backward()
        #optimizer.step()
        
        count -=1
        
        loss = loss.detach()
        inputs = inputs.detach()
        outputs = outputs.detach()
        
        # print('Batch loss: ', loss.item())

        #print('Outputs', torch.squeeze(outputs))
        #print('Batch size: ' + str(len(inputs)))
        # Multiply by the batch size and batch_multiplier (because earlier divided)
        #running_loss += loss.item() * len(inputs) * batch_multiplier
        #print('Batch average loss ' + str(loss.item()))
    #training_loss = running_loss / training_set.__len__()
    #print("Epoch training loss: " + str(training_loss))
    #epoch_training_loss.append(training_loss)
    if epoch % learnFreq == 0 or epoch == (max_epochs - 1):
        with torch.no_grad():
            net.eval()
            for i, dataset in enumerate([training_set, validation_set]):
                if i == 0:
                    print('\nEpoch training results ')
                else:
                    print('\nEpoch validation results ')
                MSE = 0
                avgAbsDev = 0
                for sample in dataset.ids:
                    X, y = dataset.__getitem__(sample)
                    X = X.unsqueeze(0) # Add fake batch dimension
                    X = X.to(device).float()
        
                    yHat = net(X).to(device)
                    yHat = yHat.to('cpu')
                    MSE += torch.norm(y - yHat)**2 / np.product(y.numpy().shape)
                    #avgAbsDev += np.abs(yHat - y)
        
                #print('Sample: ' + str(sample))
                #print('y: ' + str(y))
                #print('yHat: ' + str(yHat))
                MSE /= len(dataset.ids)
                #avgAbsDev /= len(dataset.ids)
                print('RMSE on dataset: ' + str(np.sqrt(MSE)))
                #print('Avg Abs Dev on dataset: ' + str(avgAbsDev))
                if i == 0:
                    epoch_training_loss.append(MSE)
                else:
                    epoch_val_loss.append(MSE)
        
# for epoch in range(max_epochs):
#     print("\nOn epoch: " + str(epoch))
        
#     net.train()
#     for inputs, labels in training_generator:
#         # zero the parameter gradients
#         optimizer.zero_grad()
#         print('inputs.shape: ', inputs.size())
        
#         # forward + backward + optimize
#         outputs = net(inputs.float())
#         print('outputs.shape: ', outputs.size())
#         print('labels.shape: ', labels.float().size())
#         loss = criterion(torch.squeeze(outputs), labels.float())
#         loss.backward()
#         optimizer.step()
#         print('Batch Loss: ' + str(loss.item()))
        
#     with torch.no_grad():
#         val_loss = 0
#         net.eval()
#         for ind in validation_set.ids:
#             X, y = validation_set.__getitem__(ind)
#             X = X.unsqueeze(0) # Add fake batch dimension
#             yHat = net(X.float())
#             val_loss += torch.norm(y - yHat)**2 / np.product(y.numpy().shape)
#         val_loss /= validation_set.__len__()
#         epoch_val_loss.append(val_loss)
#     print("Epoch validation loss: " + str(val_loss))
        
#     if len(epoch_val_loss) >= window + block + 1:
#         latestBlock = np.mean(epoch_val_loss[-1:-1-block])
#         earlierBlock = np.mean(epoch_val_loss[-1-window:-1-window-block])
        
#         # latestBlock must be sufficiently smaller than earlierBlock
#         if latestBlock / earlierBlock > max_frac:
#             print('Converged')
#             pdb.set_trace()
#             break
            
print('Finished Training')


On epoch: 0

Epoch training results 
RMSE on dataset: tensor(0.8807, dtype=torch.float64)

Epoch validation results 
RMSE on dataset: tensor(0.8467, dtype=torch.float64)

On epoch: 1

On epoch: 2

On epoch: 3

On epoch: 4

On epoch: 5

On epoch: 6

On epoch: 7

On epoch: 8

On epoch: 9

On epoch: 10

Epoch training results 
RMSE on dataset: tensor(0.5908, dtype=torch.float64)

Epoch validation results 
RMSE on dataset: tensor(0.5993, dtype=torch.float64)

On epoch: 11

On epoch: 12

On epoch: 13

On epoch: 14

On epoch: 15

On epoch: 16

On epoch: 17

On epoch: 18

On epoch: 19

On epoch: 20

Epoch training results 
RMSE on dataset: tensor(0.5705, dtype=torch.float64)

Epoch validation results 
RMSE on dataset: tensor(0.5874, dtype=torch.float64)

On epoch: 21

On epoch: 22

On epoch: 23

On epoch: 24

On epoch: 25

On epoch: 26

On epoch: 27

On epoch: 28

On epoch: 29

On epoch: 30

Epoch training results 
RMSE on dataset: tensor(0.5582, dtype=torch.float64)

Epoch validation result

In [45]:
with torch.no_grad():
    net.eval()
    for i, dataset in enumerate([training_set, validation_set]):
        if i == 0:
            print('\nTraining results ')
        else:
            print('\nValidation results ')
        MSE = 0
        for sample in dataset.ids:
            X, y = dataset.__getitem__(sample)
            
            X = X.to(device).unsqueeze(0) # Add fake batch dimension
            yHat = net(X.float()).to(device)
            yHat = yHat.to('cpu')
            imshow(X.to('cpu'), 10000)
            imshow(y, 10000)
            imshow(yHat, 10000)
            print(yHat)
            # squared frobenius norm
            MSE += torch.norm(y - yHat)**2 / np.product(y.numpy().shape)
        MSE /= validation_set.__len__()
    print('RMSE on dataset: ' + str(np.sqrt(MSE)))


Training results 
tensor([[[[-0.3342, -0.3326, -0.2908,  ..., -0.4227, -0.3878, -0.3451],
          [-0.2953, -0.2990, -0.2600,  ..., -0.4109, -0.3935, -0.3558],
          [-0.3038, -0.3021, -0.2728,  ..., -0.4558, -0.4314, -0.3959],
          ...,
          [-0.3007, -0.2958, -0.2665,  ..., -0.4278, -0.4076, -0.3827],
          [-0.3171, -0.3106, -0.2891,  ..., -0.4361, -0.4045, -0.3794],
          [-0.3058, -0.2949, -0.2933,  ..., -0.3834, -0.3726, -0.3728]]]])
tensor([[[[-0.3775, -0.3760, -0.3476,  ..., -0.4501, -0.4121, -0.3580],
          [-0.3360, -0.3403, -0.3205,  ..., -0.4417, -0.4217, -0.3715],
          [-0.3429, -0.3517, -0.3354,  ..., -0.4867, -0.4627, -0.4131],
          ...,
          [-0.3179, -0.3165, -0.2925,  ..., -0.4865, -0.4613, -0.4164],
          [-0.3354, -0.3323, -0.3147,  ..., -0.4874, -0.4492, -0.4072],
          [-0.3127, -0.3058, -0.3086,  ..., -0.4153, -0.4055, -0.3929]]]])
tensor([[[[-0.2690, -0.2464, -0.1819,  ..., -0.1651, -0.1953, -0.2150],
         

tensor([[[[-0.4886, -0.4896, -0.4880,  ..., -0.5254, -0.4811, -0.3971],
          [-0.4422, -0.4546, -0.4787,  ..., -0.5276, -0.5007, -0.4178],
          [-0.4624, -0.5004, -0.5198,  ..., -0.5698, -0.5476, -0.4613],
          ...,
          [-0.4818, -0.5274, -0.5603,  ..., -0.5855, -0.5694, -0.4740],
          [-0.4911, -0.5254, -0.5439,  ..., -0.5647, -0.5347, -0.4500],
          [-0.3690, -0.4057, -0.4461,  ..., -0.4673, -0.4786, -0.4221]]]])
tensor([[[[-0.3372, -0.3366, -0.2952,  ..., -0.2346, -0.2449, -0.2558],
          [-0.2983, -0.3031, -0.2647,  ..., -0.1954, -0.2173, -0.2386],
          [-0.3084, -0.3081, -0.2798,  ..., -0.1731, -0.2019, -0.2328],
          ...,
          [-0.2958, -0.2889, -0.2580,  ..., -0.4001, -0.3861, -0.3660],
          [-0.3112, -0.3025, -0.2791,  ..., -0.4102, -0.3854, -0.3649],
          [-0.3017, -0.2906, -0.2872,  ..., -0.3672, -0.3602, -0.3619]]]])
tensor([[[[-0.4303, -0.4377, -0.4200,  ..., -0.4207, -0.3850, -0.3458],
          [-0.3871, -0.4055,

tensor([[[[-0.4800, -0.4817, -0.4804,  ..., -0.4793, -0.4389, -0.3700],
          [-0.4367, -0.4489, -0.4715,  ..., -0.4741, -0.4525, -0.3862],
          [-0.4566, -0.4930, -0.5116,  ..., -0.5156, -0.4953, -0.4276],
          ...,
          [-0.4476, -0.4865, -0.5062,  ..., -0.5083, -0.4912, -0.4277],
          [-0.4648, -0.4934, -0.5040,  ..., -0.5045, -0.4726, -0.4154],
          [-0.3603, -0.3854, -0.4197,  ..., -0.4247, -0.4259, -0.3984]]]])
tensor([[[[-0.4270, -0.4285, -0.4090,  ..., -0.4091, -0.3828, -0.3397],
          [-0.3815, -0.3935, -0.3915,  ..., -0.3964, -0.3856, -0.3468],
          [-0.4015, -0.4267, -0.4290,  ..., -0.4257, -0.4149, -0.3763],
          ...,
          [-0.4620, -0.5039, -0.5283,  ..., -0.1789, -0.2119, -0.2343],
          [-0.4770, -0.5090, -0.5240,  ..., -0.1967, -0.2264, -0.2467],
          [-0.3710, -0.3972, -0.4348,  ..., -0.2368, -0.2578, -0.2750]]]])
tensor([[[[-0.3006, -0.2901, -0.2442,  ..., -0.4143, -0.3851, -0.3427],
          [-0.2659, -0.2553,

tensor([[[[-0.2736, -0.2537, -0.1948,  ..., -0.1935, -0.2169, -0.2328],
          [-0.2343, -0.2133, -0.1483,  ..., -0.1464, -0.1812, -0.2080],
          [-0.2079, -0.1723, -0.1060,  ..., -0.0986, -0.1497, -0.1863],
          ...,
          [-0.3206, -0.3185, -0.2944,  ..., -0.3237, -0.3336, -0.3217],
          [-0.3388, -0.3349, -0.3165,  ..., -0.3353, -0.3360, -0.3247],
          [-0.3134, -0.3071, -0.3104,  ..., -0.3194, -0.3296, -0.3316]]]])
tensor([[[[-0.4282, -0.4315, -0.4213,  ..., -0.4070, -0.3779, -0.3420],
          [-0.3928, -0.4042, -0.4101,  ..., -0.3939, -0.3796, -0.3490],
          [-0.4116, -0.4373, -0.4456,  ..., -0.4304, -0.4104, -0.3826],
          ...,
          [-0.3187, -0.3161, -0.2930,  ..., -0.2207, -0.2402, -0.2596],
          [-0.3315, -0.3257, -0.3056,  ..., -0.2417, -0.2553, -0.2718],
          [-0.3076, -0.3032, -0.3036,  ..., -0.2647, -0.2736, -0.2939]]]])
tensor([[[[-0.4735, -0.4760, -0.4754,  ..., -0.3257, -0.3190, -0.3009],
          [-0.4333, -0.4455,

KeyboardInterrupt: 

In [None]:
%matplotlib qt5
plt.figure()
plt.plot(range(max_epochs), epoch_training_loss, range(max_epochs), epoch_val_loss)
plt.legend(['Training', 'Validation'])