<a href="https://colab.research.google.com/github/mziad97/Airbus-semantic-segmentation-pytorch/blob/main/Airbus_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Plan

* build the model
* input dimension ? D
* concatenate layer? D
* make the transformation with CPU, train with GPU? D
* data dir: data -> train, test? D
* train
* augmentation? 
* use more images?



In [None]:
import os
from zipfile import ZipFile
from shutil import copyfile
import copy
import time 

import pandas as pd 
from sklearn.model_selection import train_test_split
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

# UNet Model

In [None]:
class Conv2d_Block(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size=3):
    super().__init__()
    self.conv2d = nn.Sequential(
      nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(kernel_size, kernel_size) , padding='same'),
      nn.ReLU(inplace=True), 
      nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=(kernel_size, kernel_size), padding='same'),
      nn.ReLU(inplace=True)
    )

  def forward(self, x):
    return self.conv2d(x)

class Encoder_Block(nn.Module):
  def __init__(self, in_channels, out_channels, pool_size=(2,2) , dropout=0.3):
    super().__init__()
    self.dropout = nn.Dropout(p=dropout, inplace=True)
    self.max_pool = nn.MaxPool2d(pool_size)
    self.conv2d_block = Conv2d_Block(in_channels, out_channels)

  def forward(self, x):
    f = self.conv2d_block(x)
    P = self.max_pool(f)
    P = self.dropout(P)

    return f, P

class Encoder(nn.Module):
  def __init__(self):
    super().__init__()
    self.encoder_block_1 = Encoder_Block(in_channels=3, out_channels=64, pool_size=(2,2) , dropout=0.3)
    self.encoder_block_2 = Encoder_Block(in_channels=64, out_channels=128, pool_size=(2,2) , dropout=0.3)
    self.encoder_block_3 = Encoder_Block(in_channels=128, out_channels=256, pool_size=(2,2) , dropout=0.3)
    self.encoder_block_4 = Encoder_Block(in_channels=256, out_channels=512, pool_size=(2,2) , dropout=0.3)

  def forward(self, x):
    f1, P1 = self.encoder_block_1(x)
    f2, P2 = self.encoder_block_2(P1)
    f3, P3 = self.encoder_block_3(P2)
    f4, P4 = self.encoder_block_4(P3)

    return P4, (f1, f2, f3, f4)

class Bottle_Neck(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv2d_block = Conv2d_Block(in_channels=512, out_channels=1024)

  def forward(self, x):
    bottleneck = self.conv2d_block(x)
    
    return bottleneck

class Decoder_Block(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size=(3, 3), strides=2, padding=1, output_padding=1, dropout=0.3):
    super().__init__()
    self.conv2d_block = Conv2d_Block(in_channels, out_channels)
    self.u = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=kernel_size, 
                                stride=strides, padding=padding, output_padding=output_padding)
    # self.dropout = nn.Dropout(p=dropout, inplace=True)

  def forward(self, x, conv_outputs):
    c = torch.cat([self.u(x), conv_outputs], 1)
    # c = self.dropout(c),
    c = self.conv2d_block(c)
    
    return c

class Decoder(nn.Module):
  def __init__(self, last_out_channels):
    super().__init__()
    self.decoder_block_1 = Decoder_Block(in_channels=1024, out_channels=512)
    self.decoder_block_2 = Decoder_Block(in_channels=512, out_channels=256)
    self.decoder_block_3 = Decoder_Block(in_channels=256, out_channels=128)
    self.decoder_block_4 = Decoder_Block(in_channels=128, out_channels=64)    
    self.conv2d_output = nn.Sequential(
      nn.Conv2d(in_channels=64, out_channels=last_out_channels, kernel_size=(1,1)),
      nn.Sigmoid()
    )

  def forward(self, x, convs):
    f1, f2, f3, f4 = convs
    c6 = self.decoder_block_1(x, f4)
    c7 = self.decoder_block_2(c6, f3)
    c8 = self.decoder_block_3(c7, f2)
    c9 = self.decoder_block_4(c8, f1)
    outputs = self.conv2d_output(c9)

    return outputs

LAST_OUT_CHANNELS = 1

class UNet(nn.Module):
  def __init__(self, LAST_OUT_CHANNELS):
    super().__init__()
    self.encoder = Encoder()
    self.decoder = Decoder(LAST_OUT_CHANNELS)
    self.bottle_neck = Bottle_Neck()

  def forward(self, x):
    encoder_output, convs = self.encoder(x)
    bottleneck = self.bottle_neck(encoder_output)
    outputs = self.decoder(bottleneck, convs)

    return outputs

# Download the data from Kaggle

In [None]:
! pip install -q kaggle

! mkdir ~/.kaggle

! cp kaggle.json ~/.kaggle/

! chmod 600 ~/.kaggle/kaggle.json

! pip install --upgrade --force-reinstall --no-deps kaggle

Collecting kaggle
  Downloading kaggle-1.5.12.tar.gz (58 kB)
[?25l[K     |█████▋                          | 10 kB 27.1 MB/s eta 0:00:01[K     |███████████▏                    | 20 kB 11.0 MB/s eta 0:00:01[K     |████████████████▊               | 30 kB 8.7 MB/s eta 0:00:01[K     |██████████████████████▎         | 40 kB 8.0 MB/s eta 0:00:01[K     |███████████████████████████▉    | 51 kB 4.2 MB/s eta 0:00:01[K     |████████████████████████████████| 58 kB 2.9 MB/s 
[?25hBuilding wheels for collected packages: kaggle
  Building wheel for kaggle (setup.py) ... [?25l[?25hdone
  Created wheel for kaggle: filename=kaggle-1.5.12-py3-none-any.whl size=73051 sha256=abea2f8bcfedad38c7ce07a706959431067a8f0f4d57be1aa5c26bfb00cc45aa
  Stored in directory: /root/.cache/pip/wheels/62/d6/58/5853130f941e75b2177d281eb7e44b4a98ed46dd155f556dc5
Successfully built kaggle
Installing collected packages: kaggle
  Attempting uninstall: kaggle
    Found existing installation: kaggle 1.5.12
    Unin

In [None]:
if ('train_v2' not in os.listdir('.')):
  ! kaggle competitions download -c airbus-ship-detection 

Downloading airbus-ship-detection.zip to /content
100% 28.6G/28.6G [11:15<00:00, 52.6MB/s]
100% 28.6G/28.6G [11:16<00:00, 45.4MB/s]


In [None]:
with ZipFile('airbus-ship-detection.zip', 'r') as zipObj:
    zipObj.extract('train_ship_segmentations_v2.csv')

In [None]:
segments = pd.read_csv('train_ship_segmentations_v2.csv', index_col=0).dropna().reset_index()

segments = segments.groupby("ImageId")[['EncodedPixels']].agg(lambda rle_codes: ' '.join(rle_codes)).reset_index()

segments = segments[:7000]

In [None]:
with ZipFile('airbus-ship-detection.zip', 'r') as zipObj:
  for file in segments['ImageId'].values:
      file = os.path.join('train_v2', file)
      zipObj.extract(file)

! rm airbus-ship-detection.zip

In [None]:
train_paths, val_paths = train_test_split(segments, train_size=0.85, shuffle=True, random_state=0)

In [None]:
print(f"The number of train set: {len(train_paths)}")
print(f"The number of test set: {len(val_paths)}")

The number of train set: 5950
The number of test set: 1050


In [None]:
train_paths = train_paths.reset_index(drop=True)
val_paths = val_paths.reset_index(drop=True)

In [None]:
"""
move the images from 'train_v2' to 'data'where there are a folder for train and val
"""
! rm -r data
! mkdir data
! mkdir data/train
! mkdir data/val

SOURCE = 'train_v2'
train_path = './data/train'
val_path = './data/val'

def build_data_dir(SOURCE, DEST, files):
  for filename in files:
    src = os.path.join(SOURCE, filename)
    dest = os.path.join(DEST, filename)
    copyfile(src, dest)

build_data_dir(SOURCE, val_path, val_paths['ImageId'].values)
build_data_dir(SOURCE, train_path, train_paths['ImageId'].values)

# Data Pipeline

In [None]:
import torchvision.transforms as transforms
import torch.utils
from torchvision.io import read_image

In [None]:
class CustomDataset(Dataset):
  """
  create a custom dataset
  
  Args:
    images_dir: the path that contains all images
    annotations: a dataframe, where each record(filename, Run-length encoding)
    transform: transformations on input images (resizing, normalization, augmentation, etc)
    target_transform: transform run-length encoding to segmentation mask
  
  returns:
    Dataset object
  """
  def __init__(self, images_dir, annotations, transform=None, target_transform=None):
    self.annotations = annotations
    self.images_dir = images_dir
    self.transform = transform
    self.target_transform = target_transform

  def __len__(self):
    return len(self.annotations)

  def __getitem__(self, idx):
    img_path = os.path.join(self.images_dir, self.annotations.iloc[idx, 0])
    image = Image.open(img_path)
    segmentation = self.annotations.iloc[idx, 1]

    if(self.transform):
      image = self.transform(image)

    if(self.target_transform):
      segmentation = self.target_transform(segmentation)

    return image, segmentation

In [None]:
def rle_to_pixels(rle_code):
  """
  Decode the segmentation mask from run-length-encoding
  1.convert the string into tokens that represents start and length
  2. unravel the the pixels range(start, start+length)
  3. map the pixel to 2D, whose shape is 768*768
  """
  rle_code = [int(i) for i in rle_code.split()]
  pixels = [(pixel_position % 768, pixel_position // 768) 
                for start, length in list(zip(rle_code[0:-1:2], rle_code[1::2])) 
                for pixel_position in range(start, start + length)]
  return pixels

def pixels_to_mask(pixels):
  """
  project the pixels onto a canvas of 768*768

  1. create a sparse tensor with the decoded pixels
  2. change to dense tensor
  3. add a dimension -> to make the dimensions: (768,768,1)
  """
  canvas = np.zeros((768, 768))

  canvas[tuple(zip(*pixels))] = 1

  return torch.as_tensor(np.expand_dims(canvas, axis=0), dtype=torch.float32)

In [None]:
transform = {
    'train': transforms.Compose([
        transforms.Resize((128, 128), interpolation=transforms.InterpolationMode.NEAREST),
        # transforms.RandomResizedCrop(224),
        # transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    ,
    'val': transforms.Compose([
        transforms.Resize((128, 128), interpolation=transforms.InterpolationMode.NEAREST),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

target_transform = {
    'train': transforms.Compose([
        rle_to_pixels,
        pixels_to_mask,
        transforms.Resize((128, 128), interpolation=transforms.InterpolationMode.NEAREST)
    ])
    ,
    'val': transforms.Compose([
        rle_to_pixels,
        pixels_to_mask,
        transforms.Resize((128, 128), interpolation=transforms.InterpolationMode.NEAREST)
    ])
}

paths = {'train':'data/train', 'val':'data/val'}

In [None]:
datasets = {'train': CustomDataset(paths['train'], train_paths, transform['train'], target_transform['train']),
            'val': CustomDataset(paths['val'], val_paths, transform['val'], target_transform['val'])
            }

dataloaders = {'train': torch.utils.data.DataLoader(datasets['train'], batch_size=16, shuffle=True),
               'val':torch.utils.data.DataLoader(datasets['val'], batch_size=16) 
               }
               
dataset_sizes = {'train': len(train_paths)*128*128,
                 'val': len(val_paths)*128*128
                 }

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Training

In [None]:
def train_model(model, optimizer, criterion, scheduler, EPOCHS):
  """
  for i in epochs:
    train phase:
      pass the inputs through the net
      compute the loss
      computer gradients
      update weights
    
    val phase:
      passe the inputs through the net
      compute the loss

  returns:
    save the weights of the best model
  """
  since = time.time()

  best_model_wts = copy.deepcopy(model.state_dict())
  best_acc = 0.0

  for epoch in range(EPOCHS):
    print(f'epoch: {epoch}/{EPOCHS}:')
    print('-'*10)

    for phase in ['train', 'val']:
      if(phase =='train'):
        model.train()
      else:
        model.eval()

      running_loss = 0.0
      running_corrects = 0

      for inputs, labels in dataloaders[phase]:
        inputs = inputs.to(device)
        labels = labels.to(device)

        batch_pixels_count = np.prod(list(inputs.shape))

        optimizer.zero_grad()

        with torch.set_grad_enabled(phase == 'train'):
          outputs = model(inputs)

          # _, preds = torch.max(outputs, 1, keepdim=True)

          loss = criterion(outputs, labels)

          if(phase == 'train'):
            loss.backward()
            optimizer.step()

        running_loss += loss.item() * batch_pixels_count 
        
        running_corrects += (labels.data == (outputs > 0.5)).sum()

      if(phase == 'train'):
        scheduler.step()

      epoch_loss = running_loss / dataset_sizes[phase]   # / total_pixels_dataset
      epoch_acc = running_corrects.double() / dataset_sizes[phase]  # / total_pixels_dataset

      print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

      if(phase == 'val' and epoch_acc > best_acc):
        best_acc = epoch_acc
        best_model_wts = copy.deepcopy(model.state_dict())
      
    print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)
    return model

In [None]:
unet = UNet(1)

unet = unet.to(device)

criterion = nn.BCELoss()


optimizer = optim.SGD(unet.parameters(), lr=0.001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

EPOCHS = 10

In [None]:
model = train_model(unet, optimizer, criterion, exp_lr_scheduler, EPOCHS)

In [None]:
torch.save(unet, 'unet.pt')