# Dataset Analysis
In this Jupyter Notebook, we analyse dataset from Airbus Ship Detection Challenge.
Main task in this challenge it's "find ships on satellite images as quickly as possible"

In [None]:
# Import requirement libraries
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import random
import os

# Finding how many images we have
num_train_images = len(os.listdir('dataset/train_v2'))
num_test_images = len(os.listdir('dataset/test_v2'))
print(f'Number of images for training: {num_train_images}')
print(f'Number of images for testing: {num_test_images}')

In [None]:
# Make function for viewing a random image
def view_random_image(target_dir, target_type):
    # Setup target directory
    target_folder = target_dir+target_type

    # Get a random image path
    random_image = random.sample(os.listdir(target_folder), 1)

    # Read the image and plot it
    img = mpimg.imread(target_folder + "\\" + random_image[0])
    plt.imshow(img)
    plt.title(random_image)
    plt.axis('off')

    # show the shape of the image
    print(f"Image shape: {img.shape}")

    # returning image
    return img

In [None]:
# View a random image from the training dataset
img = view_random_image(target_dir='dataset\\',
                        target_type='train_v2')

In [None]:
# View the tensor of image
img

In [None]:
# View the image shape
img.shape  # returns ( width, height, colour channels)

In [None]:
# Get all the pixel values between 0 & 1 (normalization)
img/255.

In [None]:
# # Check the *.csv files to use that later
# load train_ship_segmentations_v2.csv to pandas DataFrame
train_ground_truth = pd.read_csv("dataset\\train_ship_segmentations_v2.csv")
print(train_ground_truth.head())
num_masks = train_ground_truth.shape[0]
print(num_masks)

In [None]:
# load submission.csv to pandas DataFrame
submission = pd.read_csv("dataset\\sample_submission_v2.csv")
submission.head()

In [None]:
train_ground_truth.ImageId[2]

In [None]:
train_ground_truth.EncodedPixels[2]

In [None]:
rle_tensor = tf.strings.to_number(tf.strings.split(train_ground_truth.EncodedPixels[2]), tf.int64)
rle_tensor

In [None]:
rle_tensor[::2]

In [None]:
rle_tensor[1::2]

In [None]:
# create function for decoding run-length mask from "train_ship_segmentations_v2.csv"
def tf_rle_decode(rle_string, shape=(768, 768)):
    """
    Function for decoding run-length encoding mask from string.

    :param rle_string: run-length string from csv file
    :param shape: shape of output image
    :return: tensor as image mask
    """
    shape_tensor = tf.convert_to_tensor(shape, tf.int64)
    size = tf.math.reduce_prod(shape)

    rle_tensor = tf.strings.split(rle_string)
    rle_tensor = tf.strings.to_number(rle_tensor, tf.int64)

    starts = rle_tensor[::2] - 1
    lengths = rle_tensor[1::2]

    # Make ones to be scattered
    total_ones = tf.reduce_sum(lengths)
    ones = tf.ones([total_ones], tf.uint8)

    # Make scattering indices
    ones_range = tf.range(total_ones)
    lens_cumsum = tf.math.cumsum(lengths)
    rle_ssorted = tf.searchsorted(lens_cumsum, ones_range, 'right')
    idx = ones_range + tf.gather(starts - tf.pad(lens_cumsum[:-1], [(1, 0)]), rle_ssorted)

    # Scatter ones into flattened mask
    mask_flat = tf.scatter_nd(tf.expand_dims(idx, 1), ones, [size])

    # Reshape into mask
    return tf.reshape(mask_flat, shape_tensor)

In [None]:
# Set image id that we check with his mask
image_id = '0005d01c8.jpg'

# Read image from directory
image = mpimg.imread('dataset\\train_v2\\' + image_id)
# make a list with the masks that image_id match
image_masks = train_ground_truth.loc[train_ground_truth['ImageId'] == image_id, 'EncodedPixels'].tolist()

# Take the individual ship masks and create a single mask array for all ships
all_masks = tf.zeros((768, 768), tf.uint8)
for mask in image_masks:
    all_masks += tf.transpose(tf_rle_decode(mask))

# Create plot with 3 different sight of image (with masks)
fig, axarr = plt.subplots(1, 3, figsize=(15, 40))
axarr[0].axis('off')
axarr[1].axis('off')
axarr[2].axis('off')
axarr[0].imshow(image)
axarr[1].imshow(all_masks)
axarr[2].imshow(image)
axarr[2].imshow(all_masks, alpha=0.4)
plt.tight_layout(h_pad=0.1, w_pad=0.1)
plt.show()