# Model testing

Installing kaggle:

In [None]:
!pip install kaggle



Connecting to the google drive:

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Downloading and unpacking the dataset:

In [None]:
! mkdir ~/.kaggle

In [None]:
!cp /content/drive/MyDrive/ColabNotebooks/kaggle_api/kaggle.json ~/.kaggle/kaggle.json

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle competitions download airbus-ship-detection

Downloading airbus-ship-detection.zip to /content
100% 28.6G/28.6G [01:57<00:00, 295MB/s]
100% 28.6G/28.6G [01:57<00:00, 261MB/s]


In [None]:
! unzip airbus-ship-detection.zip

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
  inflating: train_v2/f9775b70e.jpg  
  inflating: train_v2/f977a470c.jpg  
  inflating: train_v2/f9780bfaf.jpg  
  inflating: train_v2/f9782bdfe.jpg  
  inflating: train_v2/f9785ca49.jpg  
  inflating: train_v2/f9785e462.jpg  
  inflating: train_v2/f97a719d5.jpg  
  inflating: train_v2/f97afa376.jpg  
  inflating: train_v2/f97b85e3e.jpg  
  inflating: train_v2/f97bd0b11.jpg  
  inflating: train_v2/f97bd6b80.jpg  
  inflating: train_v2/f97bfb1b2.jpg  
  inflating: train_v2/f97c8cbbe.jpg  
  inflating: train_v2/f97cdd397.jpg  
  inflating: train_v2/f97d92e1e.jpg  
  inflating: train_v2/f97da9e70.jpg  
  inflating: train_v2/f97e843d3.jpg  
  inflating: train_v2/f97e961f2.jpg  
  inflating: train_v2/f97e96c68.jpg  
  inflating: train_v2/f97f63ae1.jpg  
  inflating: train_v2/f97f80839.jpg  
  inflating: train_v2/f97f83438.jpg  
  inflating: train_v2/f97fab9ad.jpg  
  inflating: train_v2/f97fd4915.jpg  
  infl

Importing libraries:

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib as plt
from tqdm import tqdm
import os
from skimage.io import imread
from skimage.transform import resize

Defining Unet class:

In [None]:
class Unet:
    def __init__(self, image_height=768, image_width=768, image_channels=3):
        print("Initializing vars for U-net model...")
        self.im_height = image_height
        self.im_width = image_width
        self.im_channels = image_channels
        print("Done!")

    def get_model(self):
        inputs = tf.keras.layers.Input(shape = (self.im_height, self.im_width, self.im_channels)) #define inputs
        s = tf.keras.layers.Lambda(lambda x: x / 255)(inputs) #normalize it

        # Contraction path
        c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(s) #convolutional layer
        c1 = tf.keras.layers.Dropout(0.1)(c1) #dropout layer
        c1 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c1) #convolutional layer
        p1 = tf.keras.layers.MaxPooling2D((2, 2))(c1) #max pooling layer

        c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p1) #convolutional layer
        c2 = tf.keras.layers.Dropout(0.1)(c2) #dropout layer
        c2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c2) #convolutional layer
        p2 = tf.keras.layers.MaxPooling2D((2, 2))(c2) #max pooling layer

        c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p2) #convolutional layer
        c3 = tf.keras.layers.Dropout(0.2)(c3) #dropout layer
        c3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c3) #convolutional layer
        p3 = tf.keras.layers.MaxPooling2D((2, 2))(c3) #max pooling layer

        c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p3) #convolutional layer
        c4 = tf.keras.layers.Dropout(0.2)(c4) #dropout layer
        c4 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c4) #convolutional layer
        p4 = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(c4) #max pooling layer

        c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(p4) #convolutional layer
        c5 = tf.keras.layers.Dropout(0.3)(c5) #dropout layer
        c5 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c5) #convolutional layer

        # Expansive path
        u6 = tf.keras.layers.Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c5) #deconvolutional layer
        u6 = tf.keras.layers.concatenate([u6, c4]) #concatenating
        c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u6) #convolutional layer
        c6 = tf.keras.layers.Dropout(0.2)(c6) #dropout layer
        c6 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c6) #convolutional layer

        u7 = tf.keras.layers.Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c6) #deconvolutional layer
        u7 = tf.keras.layers.concatenate([u7, c3]) #concatenating
        c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u7) #convolutional layer
        c7 = tf.keras.layers.Dropout(0.2)(c7) #dropout layer
        c7 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c7) #convolutional layer

        u8 = tf.keras.layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c7) #deconvolutional layer
        u8 = tf.keras.layers.concatenate([u8, c2]) #concatenating
        c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u8) #convolutional layer
        c8 = tf.keras.layers.Dropout(0.1)(c8) #dropout layer
        c8 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c8) #convolutional layer

        u9 = tf.keras.layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c8) #deconvolutional layer
        u9 = tf.keras.layers.concatenate([u9, c1], axis=3) #concatenating
        c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(u9) #convolutional layer
        c9 = tf.keras.layers.Dropout(0.1)(c9) #dropout layer
        c9 = tf.keras.layers.Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same')(c9) #convolutional layer

        outputs = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid')(c9) #define outputs with final convolutional layer
        model = tf.keras.Model(inputs=[inputs], outputs=[outputs]) #initialize model

        return model

Defining functions for work with data. According to the task condition, in the submission file the image indexes should go in a strictly defined order. Therefore, I have slightly modified the function of preparing the test dataset, and now it should be passed a list of image names from the check file as an argument.

In [None]:
# Function that encode a mask into pixel sequence
def encode_pixels(mask, threshold=0.5, image_size=(768, 768)):
    image_length = image_size[0]*image_size[1] #define image length in pixels
    row = (mask > threshold).astype(np.uint8).transpose().reshape(image_length) #use threshold, transpose and reshape to the mask

    pixel_sequence = '' #define empty string for encoded pixels
    counter = 0 #define counter variable

    for k, v in enumerate(row): #enumerate row
        if v == 1:
            #if value of pixel = 1 and counter = 0 add index of pixel to pixel_sequence
            if counter == 0:
                pixel_sequence += str(k)

            counter += 1 #increment counter

            #if value = 1 and it is the last pixel of image add '1' to pixel_sequence
            if k == image_length - 1:
                pixel_sequence += " 1 "
        else:
            #if value = 0 and counter != 0 add number of not-0 pixels to pixel_sequence and set counter to 0
            if counter != 0:
                pixel_sequence += f" {counter} "
                counter = 0

    return pixel_sequence[:-1] #return pixel_sequence without last space symbol (' ')

# Function that prepare test dataset for testing a model
def prepare_test_dataset (test_images_path, test_images_titles, image_shape=(768, 768, 3), test_size=-1, start_=0, preprocessing_function=None):
    try:
        #if test_size is positive int value, create an array of test images titles
        if test_size > 0:
            test_images_titles = test_images_titles[start_:start_ + test_size]
    except:
        print("Incorrect values for dataset or images paths in 'prepare_test_datasets' function!")
        return 0

    X = np.zeros((len(test_images_titles), image_shape[0], image_shape[1], image_shape[2]), dtype=np.uint8) #create empty array for X

    for i, image_title in tqdm(enumerate(test_images_titles)): #enumerate titles of test images
        img = imread(test_images_path + "/" + str(image_title))[:, :, :image_shape[2]] #read image
        #if shapes are different, resize it
        if img.shape != image_shape:
            img = resize(img, (image_shape[0], image_shape[1]), mode='constant', preserve_range=True)
        #if any preprocessing function was defined, apply it on image
        if preprocessing_function != None:
            img = preprocessing_function(img)

        X[i] = img #add image to X array

    return X

Defining variables:

In [None]:
IMAGE_WIDTH = 768 #image width
IMAGE_HEIGHT = 768 #image height
IMAGE_CHANNELS = 3 #image depth
TEST_IMAGES_PATH = 'test_v2' #path to folder with images for model testing
MODELS_PATH = 'models' #path to folder with saved weights for models
RESULTS_PATH = 'results' #path to folder where we save results

WEIGHTS_FILE_TITLE = 'model_weights_1.h5'
SUBMISSIONS_FILE_NAME = 'submissions.csv'

batches = [
    [0, 1000],
    [1000, 1000],
    [2000, 1000],
    [3000, 1000],
    [4000, 1000],
    [5000, 1000],
    [6000, 1000],
    [7000, 1000],
    [8000, 1000],
    [9000, 1000],
    [10000, 1000],
    [11000, 1000],
    [12000, 1000],
    [13000, 1000],
    [14000, 1000],
    [15000, 606]
]
#batches=[[0, 5], [5, 5]]

Creating submission file:

In [None]:
# - Initialize model
print("Initializing model...")
unet_nn = Unet(IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS)
model = unet_nn.get_model()
model.load_weights(MODELS_PATH + "/" + str(WEIGHTS_FILE_TITLE)) #load trained model's weights
print("Done!")

pixel_sequence_array = [] #create empty array for pixel's sequences
image_ids = [] #create empty array for images' ids
test_images_titles = pd.read_csv('sample_submission_v2.csv')['ImageId'] #define list of titles of test images

for r, t in batches:
  # - Prepare test dataset
  print(f"Preparing test dataset...")
  X_test = prepare_test_dataset(
      TEST_IMAGES_PATH,
      test_images_titles,
      image_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS),
      test_size=t,
      start_=r,
      preprocessing_function=None
  ) #use function to create test dataset
  print("Done!")

  # - Generate predictions for test dataset
  print("Predicting values for test dataset...")
  Y_pred = model.predict(X_test, workers=6) #predict test masks
  print("Done!")

  # - Prepare predictions:
  print("Preparing predicted values...")
  for k, v in tqdm(enumerate(Y_pred), total=len(Y_pred)): #for each predicted mask
    image_ids.append(test_images_titles[r + k]) #add image id to the array of ids
    pixel_sequence_array.append(encode_pixels(v, threshold=0.1)) #use encoding function and add result to the array of pixel sequences
  print("Done!")

# Creating resulting .csv file
print(f"Generating resulting file... (it will be saved in {RESULTS_PATH} folder)")
predictions = pd.DataFrame({'ImageId': image_ids, 'EncodedPixels': pixel_sequence_array}) #create dataframe of predictions
predictions.to_csv(RESULTS_PATH + '/' + SUBMISSIONS_FILE_NAME, index=False) #save resulting dataset to .csv file
print("Done!")

Initializing model...
Initializing vars for U-net model...
Done!
Done!
Preparing test dataset...


1000it [00:07, 135.05it/s]


Done!
Predicting values for test dataset...
Done!
Preparing predicted values...


100%|██████████| 1000/1000 [15:40<00:00,  1.06it/s]


Done!
Preparing test dataset...


1000it [00:10, 97.65it/s]


Done!
Predicting values for test dataset...
Done!
Preparing predicted values...


100%|██████████| 1000/1000 [15:45<00:00,  1.06it/s]


Done!
Preparing test dataset...


1000it [00:09, 108.25it/s]


Done!
Predicting values for test dataset...
Done!
Preparing predicted values...


100%|██████████| 1000/1000 [15:52<00:00,  1.05it/s]


Done!
Preparing test dataset...


1000it [00:09, 102.19it/s]


Done!
Predicting values for test dataset...
Done!
Preparing predicted values...


100%|██████████| 1000/1000 [15:50<00:00,  1.05it/s]


Done!
Preparing test dataset...


1000it [00:09, 106.73it/s]


Done!
Predicting values for test dataset...
Done!
Preparing predicted values...


100%|██████████| 1000/1000 [15:33<00:00,  1.07it/s]


Done!
Preparing test dataset...


1000it [00:09, 103.14it/s]


Done!
Predicting values for test dataset...
Done!
Preparing predicted values...


100%|██████████| 1000/1000 [15:17<00:00,  1.09it/s]


Done!
Preparing test dataset...


1000it [00:09, 105.59it/s]


Done!
Predicting values for test dataset...
Done!
Preparing predicted values...


100%|██████████| 1000/1000 [15:47<00:00,  1.06it/s]


Done!
Preparing test dataset...


1000it [00:09, 102.86it/s]


Done!
Predicting values for test dataset...
Done!
Preparing predicted values...


100%|██████████| 1000/1000 [15:41<00:00,  1.06it/s]


Done!
Preparing test dataset...


1000it [00:09, 104.02it/s]


Done!
Predicting values for test dataset...
Done!
Preparing predicted values...


100%|██████████| 1000/1000 [15:37<00:00,  1.07it/s]


Done!
Preparing test dataset...


1000it [00:09, 103.24it/s]


Done!
Predicting values for test dataset...
Done!
Preparing predicted values...


100%|██████████| 1000/1000 [15:37<00:00,  1.07it/s]


Done!
Preparing test dataset...


1000it [00:09, 101.80it/s]


Done!
Predicting values for test dataset...
Done!
Preparing predicted values...


100%|██████████| 1000/1000 [15:43<00:00,  1.06it/s]


Done!
Preparing test dataset...


1000it [00:09, 101.65it/s]


Done!
Predicting values for test dataset...
Done!
Preparing predicted values...


100%|██████████| 1000/1000 [15:48<00:00,  1.05it/s]


Done!
Preparing test dataset...


1000it [00:09, 102.24it/s]


Done!
Predicting values for test dataset...
Done!
Preparing predicted values...


100%|██████████| 1000/1000 [15:33<00:00,  1.07it/s]


Done!
Preparing test dataset...


1000it [00:10, 98.74it/s]


Done!
Predicting values for test dataset...
Done!
Preparing predicted values...


100%|██████████| 1000/1000 [15:33<00:00,  1.07it/s]


Done!
Preparing test dataset...


1000it [00:09, 105.68it/s]


Done!
Predicting values for test dataset...
Done!
Preparing predicted values...


100%|██████████| 1000/1000 [15:25<00:00,  1.08it/s]


Done!
Preparing test dataset...


606it [00:05, 105.04it/s]


Done!
Predicting values for test dataset...
Done!
Preparing predicted values...


100%|██████████| 606/606 [09:13<00:00,  1.10it/s]


Done!
Generating resulting file... (it will be saved in results folder)
Done!


Seeing resulting file structure:

In [None]:
result = pd.read_csv(RESULTS_PATH + '/' + SUBMISSIONS_FILE_NAME)
result.head()

Unnamed: 0,ImageId,EncodedPixels
0,00002bd58.jpg,
1,00015efb6.jpg,
2,00023d5fc.jpg,
3,000367c13.jpg,0 1 89969 1 91505 1 93041 1 98255 1 98257 1 99...
4,0008ca6e9.jpg,
