In [1]:
import pandas as pd
import numpy as np
import pathlib

import hydra
from hydra import initialize, compose
from omegaconf import OmegaConf
initialize("./configs")

from sklearn.preprocessing import Binarizer
import tensorflow as tf
from tensorflow import keras

from utils.misc import get_items_on_path
from utils.vision.transformation import get_image_and_reshape
from training import prepare_X_and_y

import matplotlib.pyplot as plt
%matplotlib inline

hydra.initialize()

In [2]:
cfg = compose(config_name="config.yaml")
np.random.seed(cfg.project_setup.RANDOM_STATE_N)

PATH_TRAIN_IMAGE_FOLDER = pathlib.Path(cfg.project_setup.paths.data.TRAIN_IMAGE_FOLDER)
PATH_TRAIN_METADATA = pathlib.Path(cfg.project_setup.paths.data.TRAIN_METADATA)


In [3]:
# load already trained model
path_base = pathlib.Path("__file__").parents[0]
path_model = path_base.joinpath("outputs", "2022-02-06", "12-59-28", "model")
model = keras.models.load_model(path_model)

In [4]:
# load data and convert to X

# read meta data
train_metadata = pd.read_csv(PATH_TRAIN_METADATA)

## load all train images into memory as dictionary with image_id as key and the image (np.ndarray) as value
input_img_shape = (cfg.preprocessing.INPUT_SHAPE.HEIGHT, cfg.preprocessing.INPUT_SHAPE.WIDTH)

train_images_paths = get_items_on_path(PATH_TRAIN_IMAGE_FOLDER)
train_images = [get_image_and_reshape(train_image_path, input_img_shape) for train_image_path in train_images_paths]
train_images_dict = {image_id: image for image, image_id in train_images}

X, y = prepare_X_and_y(cfg=cfg, ids_and_images=train_images_dict, metadata=train_metadata)

In [10]:
## Finding optimal threshold for creating a binary mask
with tf.device(f'/device:GPU:{cfg.training.device.GPU}'):
    preds = model.predict(X)
    
    threshold_ranges = np.arange(0.25, 0.76, 0.05)

    accuracies = []

    for threshold in threshold_ranges:
        pred_mask = Binarizer(threshold = threshold).transform(preds.reshape(-1, 1)).reshape(preds.shape)
        accuracies.append((pred_mask == y).sum() / pred_mask.size)

    threshold_results_df = pd.DataFrame({
        'threshold': threshold_ranges,
        'accuracy': accuracies
    })

threshold_results_df.round(3).sort_values('accuracy', ascending = False)

InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:1 in order to run _EagerConst: Dst tensor is not initialized.

In [None]:
# Visualizing Predictions with optimal threshold for creating binary mask
num_preds = 5
sample_pred_ids = np.random.randint(len(X), size = num_preds)

pred_y = model.predict(X[sample_pred_ids])
pred_y_mask = Binarizer(threshold = threshold_results_df['threshold'].iloc[0]).transform(pred_y.reshape(-1, 1)).reshape(pred_y.shape)

plt.figure(figsize = (20 , 20))
for i in range(num_preds):
    plt.subplot(num_preds, 4, (4 * i) + 1)
    plt.imshow(X[sample_pred_ids[i]], cmap = 'gray')
    plt.axis('off')
    plt.title(f'Input image - {image_ids[sample_pred_ids[i]]}', fontsize = 16)
    
    plt.subplot(num_preds, 4, (4 * i) + 2)
    plt.imshow(y[sample_pred_ids[i]], cmap = 'gray')
    plt.axis('off')
    plt.title('Expected output mask', fontsize = 16)
    
    plt.subplot(num_preds, 4, (4 * i) + 3)
    plt.imshow(pred_y[i], cmap = 'gray')
    plt.axis('off')
    plt.title('Predicted mask', fontsize = 16)
    
    plt.subplot(num_preds, 4, (4 * i) + 4)
    plt.imshow(pred_y_mask[i], cmap = 'gray')
    plt.axis('off')
    plt.title('Binarized mask', fontsize = 16)
    
plt.suptitle("Sample inputs and outputs", fontsize = 24)
plt.tight_layout(rect = [0, 0, 0.90, 1])
plt.show()

In [None]:
img_ids = []
rle_encondings = []
for i in range(num_preds):
    img_id = image_ids[sample_pred_ids[i]]
    img_ids.append(img_id)
    
    rle_encoding = rle_encode(pred_y_mask[i])
    rle_encondings.append(rle_encoding)

sample_submission = pd.DataFrame({
    'id': img_ids,
    'predicted': rle_encondings
})