<a href="https://colab.research.google.com/github/mziad97/Airbus-semantic-segmentation-pytorch/blob/main/Airbus_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Plan

* build the model
* input dimension ? D
* concatenate layer? D
* make the transformation with CPU, train with GPU? D
* data dir: data -> train, test? D
* train
* augmentation? 
* use more images?



In [26]:
import os
from zipfile import ZipFile
from shutil import copyfile
import copy
import time 

import pandas as pd 
from sklearn.model_selection import train_test_split
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

# UNet Model

In [None]:
class Conv2d_Block(nn.Module):

  def __init__(self, in_channels, out_channels, kernel_size=3):
    super().__init__()

    self.conv2d = nn.Sequential(
      nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(kernel_size, kernel_size) , padding='same'),
      nn.ReLU(inplace=True), 
      nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=(kernel_size, kernel_size), padding='same'),
      nn.ReLU(inplace=True)
    )

    def foward(self, x):

      return self.conv2d(x)


class Encoder_Block(nn.Module):

  def __init__(self, in_channels, out_channels, pool_size=(2,2) , dropout=0.3):
    super().__init__()
    self.dropout = nn.Dropout(p=dropout)
    self.max_pool = nn.MaxPool2d(pool_size)
    self.conv2d_block = Conv2d_Block(in_channels, out_channels)

  def foward(self, x):
    f = self.conv2d_block(x)
    P = self.max_pool(f)
    P = self.dropout(P)

    return f, P

class Encoder(nn.Module):

  def __init__(self):
    super().__init__()

    self.encoder_block_1 = Encoder_Block(in_channels=3, out_channels=64)
    self.encoder_block_2 = Encoder_Block(in_channels=64, out_channels=128)
    self.encoder_block_3 = Encoder_Block(in_channels=128, out_channels=256)
    self.encoder_block_4 = Encoder_Block(in_channels=256, out_channels=512)

  def forward(self, x):
    f1, P1 = self.encoder_block(x)
    f2, P2 = self.encoder_block(P1)
    f3, P3 = self.encoder_block(P2)
    f4, P4 = self.encoder_block(P3)

    return P4, (f1, f2, f3, f4)


class Bottle_Neck(nn.Module):

  def __init__(self):
    super().__init__()
    self.conv2d_block = Conv2d_Block(in_channels=512, out_channels=1024)

  def forward(self, x):
    bottleneck = self.conv2d_block(x)
    return bottleneck

In [None]:
class Decoder_Block(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size=3, strides=2, dropout=0.3):
    super().__init__()

    self.conv2d_block = Conv2d_Block(in_channels, out_channels)
    self.u = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=kernel_size,stride=strides, padding='same')
    self.dropout = nn.Dropout(p=dropout)
  
  def forward(self, x, conv_outputs):
    c = torch.cat([self.u(x), self.conv_outputs], 1)
    c = self.dropout(c),
    c = self.conv2d_block(c)
    
    return c


class Decoder(nn.Module):

  def __init__(self, last_out_channels):
    super().__init__()
    # f1, f2, f3, f4 = convs
    self.decoder_block_1 = Decoder_Block(in_channels=1024, out_channels=512, kernel_size=(3,3), strides=(2,2), dropout=0.3)
    self.decoder_block_2 = Decoder_Block(in_channels=512, out_channels=256, kernel_size=(3,3), strides=(2,2), dropout=0.3)
    self.decoder_block_3 = Decoder_Block(in_channels=256, out_channels=128, kernel_size=(3,3), strides=(2,2), dropout=0.3)
    self.decoder_block_4 = Decoder_Block(in_channels=128, out_channels=64, kernel_size=(3,3), strides=(2,2), dropout=0.3)
    
    self.conv2d_output = nn.Sequential(
      nn.Conv2d(in_channels=64, out_channels=last_out_channels, kernel_size=(1,1)),
      nn.Sigmoid()
    )

  def forward(self, x, convs):
    f1, f2, f3, f4 = convs
    c6 = self.decoder_block_1(x, f4)
    c7 = self.decoder_block_2(c6, f3)
    c8 = self.decoder_block_3(c7, f2)
    c9 = self.decoder_block_4(c8, f1)
    outputs = self.conv2d_output(c9)

    return outputs

In [None]:
LAST_OUT_CHANNELS = 1

class UNet(nn.Module):

  def __init__(self, LAST_OUT_CHANNELS):
    super().__init__()
    self.encoder = Encoder()
    self.decoder = Decoder(LAST_OUT_CHANNELS)
    self.bottle_neck = Bottle_Neck()

  def forward(self, x):
    encoder_output, convs = self.encoder(x)
    bottleneck = self.bottle_neck(encoder_output)

    outputs = self.decoder(bottleneck, convs)

    return outputs


In [None]:
Unet = UNet(1)

# Download the data from Kaggle

In [2]:
! pip install -q kaggle

! mkdir ~/.kaggle

! cp kaggle.json ~/.kaggle/

! chmod 600 ~/.kaggle/kaggle.json

! pip install --upgrade --force-reinstall --no-deps kaggle

Collecting kaggle
  Downloading kaggle-1.5.12.tar.gz (58 kB)
[?25l[K     |█████▋                          | 10 kB 30.0 MB/s eta 0:00:01[K     |███████████▏                    | 20 kB 35.9 MB/s eta 0:00:01[K     |████████████████▊               | 30 kB 41.9 MB/s eta 0:00:01[K     |██████████████████████▎         | 40 kB 29.1 MB/s eta 0:00:01[K     |███████████████████████████▉    | 51 kB 30.9 MB/s eta 0:00:01[K     |████████████████████████████████| 58 kB 5.8 MB/s 
[?25hBuilding wheels for collected packages: kaggle
  Building wheel for kaggle (setup.py) ... [?25l[?25hdone
  Created wheel for kaggle: filename=kaggle-1.5.12-py3-none-any.whl size=73051 sha256=747bfa879872eeea22c38abe669fc5e54864509bf9f74d39470a71a43a45d3b0
  Stored in directory: /root/.cache/pip/wheels/62/d6/58/5853130f941e75b2177d281eb7e44b4a98ed46dd155f556dc5
Successfully built kaggle
Installing collected packages: kaggle
  Attempting uninstall: kaggle
    Found existing installation: kaggle 1.5.12
    U

In [3]:
if ('train_v2' not in os.listdir('.')):
  ! kaggle competitions download -c airbus-ship-detection 

Downloading airbus-ship-detection.zip to /content
100% 28.6G/28.6G [09:51<00:00, 79.3MB/s]
100% 28.6G/28.6G [09:51<00:00, 51.9MB/s]


In [4]:
with ZipFile('airbus-ship-detection.zip', 'r') as zipObj:
    zipObj.extract('train_ship_segmentations_v2.csv')

In [5]:
segments = pd.read_csv('train_ship_segmentations_v2.csv', index_col=0).dropna().reset_index()

segments = segments.groupby("ImageId")[['EncodedPixels']].agg(lambda rle_codes: ' '.join(rle_codes)).reset_index()

segments = segments[:7000]

In [6]:
with ZipFile('airbus-ship-detection.zip', 'r') as zipObj:
   # Extract all the contents of zip file in current directory
  for file in segments['ImageId'].values:
      file = os.path.join('train_v2', file)
      zipObj.extract(file)

# ! rm airbus-ship-detection.zip

In [49]:
train_paths, val_paths = train_test_split(segments, train_size=0.90, shuffle=True, random_state=0)

In [50]:
print(f"The number of train set: {len(train_paths)}")
print(f"The number of test set: {len(val_paths)}")

The number of train set: 5950
The number of test set: 1050


In [51]:
train_paths = train_paths.reset_index(drop=True)
val_paths = val_paths.reset_index(drop=True)

In [None]:
! mkdir data
! mkdir data/train
! mkdir data/val

In [29]:
SOURCE = 'train_v2'
train_path = './data/train'
val_path = './data/val'

def build_data_dir(SOURCE, DEST, files):
  for filename in files:
    src = os.path.join(SOURCE, filename)
    dest = os.path.join(DEST, filename)
    copyfile(src, dest)

build_data_dir(SOURCE, val_path, val_paths['ImageId'].values)
build_data_dir(SOURCE, train_path, train_path['ImageId'].values)

# Data Pipeline

In [31]:
import torchvision.transforms as transforms
import torch.utils
from torchvision.io import read_image

In [32]:
class CustomDataset(Dataset):
  def __init__(self, images_dir, annotations, transform=None, target_transform=None):
    self.annotations = annotations
    self.images_dir = images_dir
    self.transform = transform
    self.target_transform = target_transform
    
  def __len__(self):
    return len(self.annotations)

  def __getitem__(self, idx):
    img_path = os.path.join(self.images_dir, self.annotations.iloc[idx, 0])
    image = Image.open(img_path)
    segmentation = self.annotations.iloc[idx, 1]

    if(self.transform):
      image = self.transform(image)

    if(self.target_transform):
      segmentation = self.target_transform(segmentation)

    return image, segmentation

In [39]:
def rle_to_pixels(rle_code):
  '''
  Transforms a RLE code string into a list of pixels of a (768, 768) canvas
  '''
  rle_code = [int(i) for i in rle_code.split()]
  pixels = [(pixel_position % 768, pixel_position // 768) 
                for start, length in list(zip(rle_code[0:-1:2], rle_code[1::2])) 
                for pixel_position in range(start, start + length)]
  return pixels

def pixels_to_mask(pixels):
  canvas = np.zeros((768, 768))

  canvas[tuple(zip(*pixels))] = 1

  return torch.as_tensor(np.expand_dims(canvas, axis=0), dtype=torch.uint8)

In [40]:
transform = {
    'train': transforms.Compose([
        transforms.Resize((572, 572), interpolation=transforms.InterpolationMode.NEAREST),
        # transforms.RandomResizedCrop(224),
        # transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]).to_device('cpu:0')
    ,
    'val': transforms.Compose([
        transforms.Resize((572, 572), interpolation=transforms.InterpolationMode.NEAREST),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]).to_device('cpu:0')
}


target_transform = {
    'train': transforms.Compose([
        rle_to_pixels,
        pixels_to_mask,
        transforms.Resize((572, 572), interpolation=transforms.InterpolationMode.NEAREST)
    ]).to_device('cpu:0')
    ,
    'val': transforms.Compose([
        rle_to_pixels,
        pixels_to_mask,
        transforms.Resize((572, 572), interpolation=transforms.InterpolationMode.NEAREST)
    ]).to_device('cpu:0')
}

paths = {'train':'data/train', 'val':'data/val'}

In [45]:
datasets = {x: CustomDataset(x, x + '_paths', transform[x], target_transform[x]) 
            for x in ['train', 'val']}

dataloaders = {'train': torch.utils.data.DataLoader(datasets['train'], batch_size=32, shuffle=True),
               'val':torch.utils.data.DataLoader(datasets['val'], batch_size=32) }
               
dataset_sizes = {'train': len(train_paths),
                 'val': len(val_paths)}

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [11]:
# def im_show(inp):
#     plt.figure(figsize=(20, 10))
#     inp = inp.numpy().transpose((1, 2, 0))
#     # mean = np.array([0.485, 0.456, 0.406])
#     # std = np.array([0.229, 0.224, 0.225])
#     # inp = std * inp + mean
#     # inp = np.clip(inp, 0, 1)
#     plt.imshow(inp)

#     plt.pause(0.001)  # pause a bit so that plots are updated

In [7]:
# def seg_show(seg):
#   plt.figure(figsize=(20, 10))
#   # seg = seg.squeeze()
  
#   plt.imshow(seg)
#   plt.pause(0.001)

In [8]:
# out = torchvision.utils.make_grid(image)

In [9]:
# im_show(out)

In [10]:
# plt.figure(figsize=(20,10))
# for i in range(6):
#   plt.subplot(1,6,i+1)
#   plt.imshow(segmentation[i].squeeze())

# Training

In [None]:
loss = torch.
SGD = torch.optium.SGD()

In [None]:
def train_model(model, optimizer, criterion, scheduler, EPOCHS):
  since = time.time()

  best_model_wts = copy.deepcopy(model.state_dict())
  best_acc = 0.0

  for epoch in range(EPOCHS):
    print(f'epoch: {epoch}/{EPOCHS}:')
    print('-'*10)

    for phase in ['train', 'val']:
      if(phase =='train'):
        model.train()
      else:
        model.eval()

      running_loss = 0.0
      running_corrects = 0

      for inputs, labels in dataloaders[phase]:
        inputs = inputs.to_device('gpu:0')
        labels = labels.to_device('gpu:0')

        optimizer.zero_grad()

        with torch.set_grad_enabled(phase == 'train'):
          outputs = model(inputs)
          _, preds = torch.max(outputs, 1)
          loss = criterion(preds, labels)

          if(phase == 'train'):
            loss.backward()
            optimizer.step()

        running_loss = loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
      
      if(phase == 'train'):
        scheduler.step()

      epoch_loss = running_loss / dataset_sizes[phase]
      epoch_acc = running_corrects.double() / dataset_sizes[phase]

      print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

      if(phase == 'val' and epoch_acc > best_acc):
        best_acc = epoch_acc
        best_model_wts = copy.deepcopy(model.state_dict())
      
    print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)
    return model

In [None]:
unet = UNet(1)

unet = unet.to_device('gpu:0')

criterion = nn.BCELOSS()

optimizer = optim.SGD(unet.parameter(), lr=0.001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)