In [2]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
#Cutting image
def Cutting(img):
  img_size = img.shape
  mod1= img.shape[0]-60 % 16
  mod2= img.shape[1]-60 % 16
  if mod1 != 0:
    start=int(np.trunc(mod1)) + (mod1 % 2)
    end= img.shape[0]-int(np.trunc(mod1))
    new_img=img[start:end-1,:]
  if mod2 != 0:
    start=int(np.trunc(mod2)) + (mod2 % 2)
    end= img.shape[1]-int(np.trunc(mod2))
    new_img=new_img[:,start:end-1]
  return new_img

In [3]:
'''
def calc_dim_input(dim_image):
   def expand1(x):
     return (x+4)/2
   def expand2(x):
     return (x+4)*2
   dimension_input=expand1(expand1(expand1(expand1(dim_image))))
   dimension_input=expand2(expand2(expand2(expand2(dimension_input))))
   dimension_input=dimension_input+4
   return int(dimension_input)
calc_dim_input(996)
 '''   
def calc_dim_input(dim_image):
  '''
    Calculating the dimension of input to have an output image of the same size as the original input image
  '''
  return dim_image + 184           

In [4]:
def extend_mirror(img, out_size):
    '''
    A method to extend an image to certain resolution by mirrorring the edges
    Input:
    :img: image as numpy array
    :out_size: a tuple of the desired output resolution
    Output:
    :out: the extended image
    '''
    if np.any(img.shape>out_size):
        raise Exception('Error: at least on of out_size axes is smaller than the image shape')
    img_size = img.shape
    out = np.zeros(out_size)
    v_edge_u = (out_size[0]-img_size[0]) // 2
    v_edge_d = -(out_size[0]-img_size[0]-v_edge_u)
    h_edge_l = (out_size[1]-img_size[1]) // 2
    h_edge_r = -(out_size[1]-img_size[1]-h_edge_l)
    # centre
    out[v_edge_u:v_edge_d,h_edge_l:h_edge_r] = img
    # sides
    out[:v_edge_u,h_edge_l:h_edge_r] = np.flipud(img[:v_edge_u,:]) # top
    out[v_edge_d:,h_edge_l:h_edge_r] = np.flipud(img[v_edge_d:,:]) # bottom
    out[v_edge_u:v_edge_d,:h_edge_l] = np.fliplr(img[:,:h_edge_l]) # left
    out[v_edge_u:v_edge_d,h_edge_r:] = np.fliplr(img[:,h_edge_r:]) # right
    # corners
    out[:v_edge_u,:h_edge_l] = np.fliplr(out[:v_edge_u,h_edge_l:h_edge_l*2]) # top-left
    out[:v_edge_u,h_edge_r:] = np.fliplr(out[:v_edge_u,2*h_edge_r:h_edge_r]) # top-right
    out[v_edge_d:,:h_edge_l] = np.fliplr(out[v_edge_d:,h_edge_l:h_edge_l*2]) # bottom-left
    out[v_edge_d:,h_edge_r:] = np.fliplr(out[v_edge_d:,2*h_edge_r:h_edge_r]) # bottom-right
    return out

Implementing U-net for our problem. Here all the 23 layers are used. Due to the fact that our problem consist in saying if a pixel is part or not of an air bubble the final output is given by tensor with two channels, each one corresponding to a matrix of dimensions equal to the original image. Using many convolutional layers the final output has a lower dimension than the input. In order to obtained the desired output the input image is expanded. The parts added are obtained mirroring parts of the real image as described in the paper. 
This tecnique is used in the training of NN.


In [5]:
class U_net(nn.Module):
    def __init__(self, n_channels=64):
        'U-net from the paper "Olaf Ronneberger, Philipp Fischer, and Thomas Brox": https://arxiv.org/abs/1505.04597 '
        super(U_net, self).__init__()
        self.conv1 = nn.Conv2d(1,n_channels,3)
        self.conv2 = nn.Conv2d(n_channels,n_channels,3)
        self.pool = nn.MaxPool2d(2,stride=2)
        self.conv3 = nn.Conv2d(n_channels,2*n_channels,3)
        self.conv4 = nn.Conv2d(2*n_channels,2*n_channels,3)
        self.conv5 = nn.Conv2d(2*n_channels,4*n_channels,3)
        self.conv6 = nn.Conv2d(4*n_channels,4*n_channels,3)
        self.conv7 = nn.Conv2d(4*n_channels,8*n_channels,3)
        self.conv8 = nn.Conv2d(8*n_channels,8*n_channels,3)
        self.conv9 = nn.Conv2d(8*n_channels,16*n_channels,3)
        self.conv10 = nn.Conv2d(16*n_channels,16*n_channels,3)
        #self.upconv1= nn.ConvTranspose2d(16*n_channels,16*n_channels,2)
        self.upconv1 = nn.Upsample(scale_factor=2)
        self.conv11 = nn.Conv2d(16*n_channels,8*n_channels,3)
        self.conv12 = nn.Conv2d(8*n_channels,8*n_channels,3)
        #self.upconv2= nn.ConvTranspose2d(8*n_channels,8*n_channels,2)
        self.upconv2 = nn.Upsample(scale_factor=2)
        self.conv13 = nn.Conv2d(8*n_channels,4*n_channels,3)
        self.conv14 = nn.Conv2d(4*n_channels,4*n_channels,3)
        #self.upconv3= nn.ConvTranspose2d(4*n_channels,4*n_channels,2)
        self.upconv3 = nn.Upsample(scale_factor=2)
        self.conv15 = nn.Conv2d(4*n_channels,2*n_channels,3)
        self.conv16 = nn.Conv2d(2*n_channels,2*n_channels,3)
        #self.upconv4= nn.ConvTranspose2d(2*n_channels,2*n_channels,2)
        self.upconv4 = nn.Upsample(scale_factor=2)
        self.conv17 = nn.Conv2d(2*n_channels,n_channels,3)
        self.conv18 = nn.Conv2d(n_channels,n_channels,3)
        self.conv1_1= nn.Conv2d(n_channels,2,1)
        self.activation1=nn.ReLU()
    
    def forward(self, x):
        out = self.pool(self.activation1(self.conv2(self.activation1(self.conv1(x)))))
        out = self.pool(self.activation1(self.conv4(self.activation1(self.conv3(out)))))
        out = self.pool(self.activation1(self.conv6(self.activation1(self.conv5(out)))))
        out = self.pool(self.activation1(self.conv8(self.activation1(self.conv7(out)))))
        out = self.activation1(self.conv10(self.activation1(self.conv9(out))))
        out = self.upconv1(out)
        out = self.activation1(self.conv12(self.activation1(self.conv11(out))))
        out = self.upconv2(out)
        out = self.activation1(self.conv14(self.activation1(self.conv13(out))))
        out = self.upconv3(out)
        out = self.activation1(self.conv16(self.activation1(self.conv15(out))))
        out = self.upconv4(out)
        out = self.activation1(self.conv18(self.activation1(self.conv17(out))))
        out = self.conv1_1(out)
        return out

    

In [6]:
#Accuracy 
def accuracy(pred, test_labels):
    '''
    pred: torch.tensor (result of U-net) of size [num_batches=1, 2, dim_image1, dim_image2]
    test_labels: torch.tensor (Real labels for the image) 
    '''
    '''
    Calculate the percentage of correct pixels labeled
    '''
    test_labels=test_labels.view(1,test_labels)
    label_pred=torch.argmax(pred,dim=1)
    acc= (torch.abs(label_pred-test_labels)).mean()
    return 1-acc 

In [7]:
#Training (adapting function from lab10)
def train(model, criterion, image_input, labeled_images, optimizer, num_epochs):
  """
  @param model: torch.nn.Module
  @param criterion: torch.nn.modules.loss._Loss
  @param image_input: numpy.ndarray
  @param labeled_images: numpy.ndarray
  @param optimizer: torch.optim.Optimizer
  @param num_epochs: int
  """

  print("Starting training")
  #Cycle for epochs
  for epoch in range(num_epochs):
    # Train an epoch
    model.train()
    #Training using an image
    for x, y in image_input, images_labeled:
      #Cutting image and label to have an even correct dimension
      x=Cutting(x)
      y=Cutting(y)
      dimension_input1= calc_dim_input(x.shape[0])
      dimension_input2= calc_dim_input(x.shape[1]) 
      out_size=[dimension_input1,dimension_input2]
      #Mirroring like describe in the paper
      ext_x = extend_mirror(x, out_size)
      ext_x=torch.from_numpy(ext_x)
      ext_x=ext_x.view(1,1,dimension_input1,dimension_input2)
      y=torch.from_numpy(y)
      y=y.view(1,1,dim_image1,dim_image2)

      # Evaluate the network (forward pass)
      prediction = model(ext_x)
      loss = criterion(prediction,y)
      
      # Compute the gradient
      optimizer.zero_grad()
      loss.backward()

      # Update the parameters of the model with a gradient step
      optimizer.step()

The loss function used to train the network is CrossEntropy as described in the paper. 


In [53]:
num_epochs=10
learning_rate=0.001
optimizer=torch.optim.Adam(U_net(), lr=learning_rate) #In the paper they use SGD
criterion=torch.nn.CrossEntropyLoss() 
train(U_net(), criterion, image_input, labeled_images, optimizer, num_epochs)

TypeError: ignored

In [None]:
#Trying NN
dim_image1=1000
dim_image2=1000
image=torch.rand(dim_image1,dim_image2)
image=image.view(1,1,dim_image1,dim_image2)
model=U_net()
a=model(image)

In [55]:
a.size()

torch.Size([1, 2, 388, 388])

To try a different approach we adapt the U-net changing the last layer. In this case the output is given by a tensor with only one channel. After that a sigmoid function is applied in order to have an output value for every pixel between 0 and 1. This value models the probability of a pixel to be part of an air bubble. 
This is equivalent to do a logistic regression in the last layer, therefore in the training of the model we use 'Bcewithlogitloss' function of pytorch, which gives us logistic loss.



In [3]:
class U_net_with_prob(nn.Module):
    def __init__(self, n_channels=64):
        'U-net from the paper "Olaf Ronneberger, Philipp Fischer, and Thomas Brox": https://arxiv.org/abs/1505.04597 '
        super(U_net_with_prob, self).__init__()
        self.conv1 = nn.Conv2d(1,n_channels,3)
        self.conv2 = nn.Conv2d(n_channels,n_channels,3)
        self.pool = nn.MaxPool2d(2,stride=2)
        self.conv3 = nn.Conv2d(n_channels,2*n_channels,3)
        self.conv4 = nn.Conv2d(2*n_channels,2*n_channels,3)
        self.conv5 = nn.Conv2d(2*n_channels,4*n_channels,3)
        self.conv6 = nn.Conv2d(4*n_channels,4*n_channels,3)
        self.conv7 = nn.Conv2d(4*n_channels,8*n_channels,3)
        self.conv8 = nn.Conv2d(8*n_channels,8*n_channels,3)
        self.conv9 = nn.Conv2d(8*n_channels,16*n_channels,3)
        self.conv10 = nn.Conv2d(16*n_channels,16*n_channels,3)
        #self.upconv1= nn.ConvTranspose2d(16*n_channels,16*n_channels,2)
        self.upconv1 = nn.Upsample(scale_factor=2)
        self.conv11 = nn.Conv2d(16*n_channels,8*n_channels,3)
        self.conv12 = nn.Conv2d(8*n_channels,8*n_channels,3)
        #self.upconv2= nn.ConvTranspose2d(8*n_channels,8*n_channels,2)
        self.upconv2 = nn.Upsample(scale_factor=2)
        self.conv13 = nn.Conv2d(8*n_channels,4*n_channels,3)
        self.conv14 = nn.Conv2d(4*n_channels,4*n_channels,3)
        #self.upconv3= nn.ConvTranspose2d(4*n_channels,4*n_channels,2)
        self.upconv3 = nn.Upsample(scale_factor=2)
        self.conv15 = nn.Conv2d(4*n_channels,2*n_channels,3)
        self.conv16 = nn.Conv2d(2*n_channels,2*n_channels,3)
        #self.upconv4= nn.ConvTranspose2d(2*n_channels,2*n_channels,2)
        self.upconv4 = nn.Upsample(scale_factor=2)
        self.conv17 = nn.Conv2d(2*n_channels,n_channels,3)
        self.conv18 = nn.Conv2d(n_channels,n_channels,3)
        self.conv1_1= nn.Conv2d(n_channels,1,1)
        self.activation1=nn.ReLU()
        self.activation2=nn.Sigmoid()
    
    def forward(self, x):
        out = self.pool(self.activation1(self.conv2(self.activation1(self.conv1(x)))))
        out = self.pool(self.activation1(self.conv4(self.activation1(self.conv3(out)))))
        out = self.pool(self.activation1(self.conv6(self.activation1(self.conv5(out)))))
        out = self.pool(self.activation1(self.conv8(self.activation1(self.conv7(out)))))
        out = self.activation1(self.conv10(self.activation1(self.conv9(out))))
        out = self.upconv1(out)
        out = self.activation1(self.conv12(self.activation1(self.conv11(out))))
        out = self.upconv2(out)
        out = self.activation1(self.conv14(self.activation1(self.conv13(out))))
        out = self.upconv3(out)
        out = self.activation1(self.conv16(self.activation1(self.conv15(out))))
        out = self.upconv4(out)
        out = self.activation1(self.conv18(self.activation1(self.conv17(out))))
        out = self.activation2(self.conv1_1(out))
        return out


In [None]:
num_epochs=10
learning_rate=0.001
optimizer=torch.optim.Adam(U_net_with_prob, lr=learning_rate) 
criterion=nn.BCEWithLogitsLoss( reduction = 'mean')
train(U_net_with_prob, criterion, image_input, labeled_images, optimizer, num_epochs)

In [None]:
#Trying NN
dim_image1=1000
dim_image2=1000
image=torch.rand(dim_image1,dim_image2)
image=image.view(1,1,dim_image1,dim_image2)
model=U_net_with_prob()
a=model(image)

In [None]:
a.size()