<a href="https://colab.research.google.com/github/mziad97/Airbus-ship-semantic-segmentation/blob/main/Airbus_Ship_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
## for viz
# def apply_mask(image, mask):
#     for x, y in mask:
#         image[x, y, [0, 1]] = 255
#     return image


# load_img = lambda filename: np.array(PIL.Image.open(f"./train_v2/{filename}"))

# img = load_img(segments.loc[0, 'ImageId'])
# mask_pixels = rle_to_pixels(segments.loc[0, 'EncodedPixels'])
# img = apply_mask(img, mask_pixels)
# plt.imshow(img);

In [2]:
import os
import pathlib
import PIL
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import image
from zipfile import ZipFile

In [3]:
import tensorflow as tf

from sklearn.model_selection import train_test_split


## UNet Model

In [34]:
# Encoder

def conv2d_block(input, n_filters, kernel_size=3):
  x = input
  for i in range(2):
    x = tf.keras.layers.Conv2D(filters=n_filters, kernel_size=(kernel_size, kernel_size), 
                               kernel_initializer='he_normal', padding='same')(x)
    x = tf.keras.layers.Activation('relu')(x)

    return x

def encoder_block(inputs, n_filters=64, pool_size=(2,2), dropout=0.3):

  f = conv2d_block(inputs, n_filters=n_filters)
  p = tf.keras.layers.MaxPooling2D(pool_size=(2,2))(f)
  p = tf.keras.layers.Dropout(0.3)(p)

  return f, p

def encoder(inputs):
  f1, p1 = encoder_block(inputs, n_filters=64, pool_size=(2,2), dropout=0.3)
  f2, p2 = encoder_block(p1, n_filters= 128, pool_size=(2,2), dropout=0.3)
  f3, p3 = encoder_block(p2, n_filters= 256, pool_size=(2,2), dropout=0.3)
  f4, p4 = encoder_block(p3, n_filters=512, pool_size=(2,2), dropout=0.3)

  return p4, (f1,f2,f3,f4)

def bottleneck(inputs):
  bottle_neck = conv2d_block(inputs, n_filters=1024)
  return bottle_neck

In [35]:
# Decoder Utilities

def decoder_block(inputs, conv_output, n_filters=64, kernel_size=3, strides=3, dropout=0.3):
  '''
  defines the one decoder block of the UNet

  Args:
    inputs (tensor) -- batch of input features
    conv_output (tensor) -- features from an encoder block
    n_filters (int) -- number of filters
    kernel_size (int) -- kernel size
    strides (int) -- strides for the deconvolution/upsampling
    padding (string) -- "same" or "valid", tells if shape will be preserved by zero padding

  Returns:
    c (tensor) -- output features of the decoder block
  '''
  u = tf.keras.layers.Conv2DTranspose(n_filters, kernel_size, strides = strides, padding = 'same')(inputs)
  c = tf.keras.layers.concatenate([u, conv_output])
  c = tf.keras.layers.Dropout(dropout)(c)
  c = conv2d_block(c, n_filters, kernel_size=3)

  return c


def decoder(inputs, convs, output_channels):
  '''
  Defines the decoder of the UNet chaining together 4 decoder blocks. 
  
  Args:
    inputs (tensor) -- batch of input features
    convs (tuple) -- features from the encoder blocks
    output_channels (int) -- number of classes in the label map

  Returns:
    outputs (tensor) -- the pixel wise label map of the image
  '''
  
  f1, f2, f3, f4 = convs

  c6 = decoder_block(inputs, f4, n_filters=512, kernel_size=(3,3), strides=(2,2), dropout=0.3)
  c7 = decoder_block(c6, f3, n_filters=256, kernel_size=(3,3), strides=(2,2), dropout=0.3)
  c8 = decoder_block(c7, f2, n_filters=128, kernel_size=(3,3), strides=(2,2), dropout=0.3)
  c9 = decoder_block(c8, f1, n_filters=64, kernel_size=(3,3), strides=(2,2), dropout=0.3)

  outputs = tf.keras.layers.Conv2D(output_channels, (1, 1), activation='softmax')(c9)

  return outputs

In [36]:
OUTPUT_CHANNELS = 1

def unet():
  '''
  Defines the UNet by connecting the encoder, bottleneck and decoder.
  '''

  # specify the input shape
  inputs = tf.keras.layers.Input(shape=(128, 128,3,))

  # feed the inputs to the encoder
  encoder_output, convs = encoder(inputs)

  # feed the encoder output to the bottleneck
  bottle_neck = bottleneck(encoder_output)

  # feed the bottleneck and encoder block outputs to the decoder
  # specify the number of classes via the `output_channels` argument
  outputs = decoder(bottle_neck, convs, output_channels=OUTPUT_CHANNELS)
  
  # create the model
  model = tf.keras.Model(inputs=inputs, outputs=outputs)

  return model



In [37]:
model = unet()

## Get the data from kaggle

In [38]:
! pip install -q kaggle

! mkdir ~/.kaggle

In [39]:
! cp kaggle.json ~/.kaggle/

! chmod 600 ~/.kaggle/kaggle.json

In [40]:
!pip install --upgrade --force-reinstall --no-deps kaggle

Collecting kaggle
  Downloading kaggle-1.5.12.tar.gz (58 kB)
[?25l[K     |█████▋                          | 10 kB 23.3 MB/s eta 0:00:01[K     |███████████▏                    | 20 kB 27.5 MB/s eta 0:00:01[K     |████████████████▊               | 30 kB 23.1 MB/s eta 0:00:01[K     |██████████████████████▎         | 40 kB 17.5 MB/s eta 0:00:01[K     |███████████████████████████▉    | 51 kB 8.3 MB/s eta 0:00:01[K     |████████████████████████████████| 58 kB 2.4 MB/s 
[?25hBuilding wheels for collected packages: kaggle
  Building wheel for kaggle (setup.py) ... [?25l[?25hdone
  Created wheel for kaggle: filename=kaggle-1.5.12-py3-none-any.whl size=73051 sha256=28f82cbb474e2e2bf0c4f5762422b1d07e97116af18af12e66735cd136159a62
  Stored in directory: /root/.cache/pip/wheels/62/d6/58/5853130f941e75b2177d281eb7e44b4a98ed46dd155f556dc5
Successfully built kaggle
Installing collected packages: kaggle
  Attempting uninstall: kaggle
    Found existing installation: kaggle 1.5.12
    Un

In [41]:
if ('train_v2' not in os.listdir('.')):
  ! kaggle competitions download -c airbus-ship-detection

  with ZipFile('airbus-ship-detection.zip', 'r') as zipObj:
    zipObj.extract('train_ship_segmentations_v2.csv')

Downloading airbus-ship-detection.zip to /content
100% 28.6G/28.6G [09:39<00:00, 61.8MB/s]
100% 28.6G/28.6G [09:42<00:00, 52.7MB/s]


In [43]:
segments = pd.read_csv('train_ship_segmentations_v2.csv', index_col=0).dropna().reset_index()
segments['ImageId'] = segments['ImageId'].map(lambda filename: "train_v2/" + filename)


with ZipFile('airbus-ship-detection.zip', 'r') as zipObj:
   # Extract all the contents of zip file in current directory
  for file in segments['ImageId'].values:
      zipObj.extract(file)

! rm airbus-ship-detection.zip

In [44]:
# read segments, join images containing multiple ships
segments = segments.groupby("ImageId")[['EncodedPixels']].agg(lambda rle_codes: ' '.join(rle_codes)).reset_index()

In [45]:
train_paths, test_paths = train_test_split(segments, train_size=0.8, shuffle=True, random_state=0)

In [46]:
print(f"The number of train set: {len(train_paths)}")
print(f"The number of test set: {len(test_paths)}")

The number of train set: 34044
The number of test set: 8512


## Data Pipeline

In [None]:
# def to_mask(rle):
#   pixels = rle_to_pixels(rle)
#   temp_var = tf.Variable(initial_value=[0]*589824, dtype=tf.uint8)
#   # tf.Variable(tf.zeros(shape=(589824)) )
#   tf.compat.v1.scatter_update(ref=temp_var,
#                             indices=pixels,
#                             updates=1)
#   temp_var = tf.reshape(temp_var, (768,768))
#   return tf.transpose(tf.expand_dims(temp_var, 0)) 

In [74]:
def rle_to_pixels(rle_code):
    rle_code = tf.strings.to_number( tf.strings.split(rle_code), tf.int64 )
    # rle_code = [int(i) for i in tf.strings.split(rle_code)]
    output = tf.map_fn(fn=lambda start_len: tf.range(start=start_len[0], limit=start_len[0] + start_len[1]) ,
             elems=tf.stack([rle_code[0:-1:2], rle_code[1::2]], axis=1), 
             fn_output_signature=tf.RaggedTensorSpec(shape=[None], dtype=tf.int64) )
    
    pixels = tf.stack( tf.map_fn(fn=lambda x: (x % 768, x // 768), elems=output.flat_values, 
                                 fn_output_signature= (tf.int64, tf.int64)), axis=1)
    
    # pixels = [(pixel_position % 768, pixel_position // 768) 
    #              for start, length in tf.stack([temp_rle[0:-1:2], temp_rle[1::2]], axis=1) 
    #              for pixel_position in tf.range(start, start + length)]
    
    return pixels
    # output.flat_values

In [88]:
def pixels_to_mask(pixels):
  # temp_var = tf.Variable(initial_value=[0]*589824, dtype=tf.uint8)
  # tf.Variable(tf.zeros(shape=(589824)) )
  
  # temp_var = tf.sparse.SparseTensor(indices=pixels, values=tf.ones(shape=len(pixels), dtype=tf.uint8), dense_shape=(768, 768))

  # slices = tf.IndexedSlices(1, indices=pixels)
  # temp_var.scatter_update(slices) 
  # temp_var = tf.reshape(temp_var, (768,768))
  # tf.transpose(tf.expand_dims(temp_var, 0))

  return tf.expand_dims( tf.sparse.to_dense( tf.sparse.reorder( tf.sparse.SparseTensor(indices=pixels, 
                                       values=tf.ones(shape=len(pixels), dtype=tf.uint8), 
                                           dense_shape=(768, 768)) )), axis=2 )

In [56]:
def normalize(input_image):
  """normalizes the input image pixel values to be [0,1] """

  input_image = tf.cast(input_image, tf.float32)
  input_image /= 255.0
  return input_image

In [57]:
def load_image(datapoint):
  """
  return a resized and normalized pair of image and mask
  args
    datapoint: a single image and its corresponding segmentation mask

  1. load the image from its path, decode it to jpeg, normalize it to [0,1]
  2. decode the run-length encoding to pixels, then project the mask onto canvas with same size as image
  3. resize both the image and segmentation mask, to math the input size of the network i.e (128,128)
  """

  input_image = tf.io.read_file(datapoint[0])
  input_image = tf.image.decode_jpeg(input_image, channels=3)
  input_image = tf.image.resize(input_image, (128, 128), method='nearest')
  
  rle =  rle_to_pixels(datapoint[1])
  mask = pixels_to_mask(rle)
  input_mask = tf.image.resize(mask, (128, 128), method='nearest')
  
  input_image = normalize(input_image)

  return input_image, input_mask

In [58]:
train_paths_dataset = tf.data.Dataset.from_tensor_slices(train_paths)
test_paths_dataset = tf.data.Dataset.from_tensor_slices(test_paths)

In [89]:
train = train_paths_dataset.map(load_image)
test = test_paths_dataset.map(load_image)

## Training The UNet Model

In [91]:
BATCH_SIZE = 512
BUFFER_SIZE = 1000

# shuffle and group the train set into batches
train_dataset = train.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

# do a prefetch to optimize processing
train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

# group the test set into batches
test_dataset = test.batch(BATCH_SIZE)

In [92]:
model.compile(optimizer=tf.keras.optimizers.Adam(), loss='binary_crossentropy',
              metrics=['accuracy'])

In [93]:
# configure the training parameters and train the model

TRAIN_LENGTH = len(train_paths)
EPOCHS = 2
VAL_SUBSPLITS = 5
STEPS_PER_EPOCH = TRAIN_LENGTH // BATCH_SIZE
VALIDATION_STEPS = len(test_paths) // BATCH_SIZE // VAL_SUBSPLITS


model_history = model.fit(train_dataset, epochs=EPOCHS,
                          steps_per_epoch=STEPS_PER_EPOCH,
                          validation_steps=VALIDATION_STEPS,
                          validation_data=test_dataset)

Epoch 1/2


InvalidArgumentError: ignored

## Model Evaluation

In [None]:
def dice_metric(y_true, y_pred):

  smoothening_factor = 0.00001
    
  intersection = np.sum((y_pred == i) * (y_true == i))
  y_true_area = np.sum((y_true == i))
  y_pred_area = np.sum((y_pred == i))
  combined_area = y_true_area + y_pred_area

  
  dice_score =  2 * ((intersection + smoothening_factor) / (combined_area + smoothening_factor))

