In [None]:
import pandas as pd
import numpy as np
import pathlib
import pickle
import hydra
from hydra import initialize, compose
from omegaconf import OmegaConf
initialize("../configs")

from sklearn.preprocessing import Binarizer
import tensorflow as tf
from tensorflow import keras

from utils.vision.transformation import rle_encode
from training import prepare_X_and_y

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
cfg = compose(config_name="config.yaml")
np.random.seed(cfg.project_setup.RANDOM_STATE_N)

PATH_PARENT = pathlib.Path("__file__").absolute().parents[0]
PATH_CACHE_DATA = PATH_PARENT.joinpath(cfg.project_setup.paths.data.PREPROCESSED_CACHE)
PATH_MODEL = PATH_PARENT.joinpath("data", "working","multirun", "2022-02-12", "12-14-51", "0","model")

In [None]:
# load trained model & trainings data
model = keras.models.load_model(PATH_MODEL)

with open(PATH_CACHE_DATA, 'rb') as f:
    train_data = pickle.load(f)
    
X, y = prepare_X_and_y(train_data)

In [None]:
## Finding optimal threshold for creating a binary mask
with tf.device(f'/device:GPU:{cfg.training.device.GPU}'):
    preds = model.predict(X)

threshold_ranges = np.arange(0.25, 0.76, 0.05)

accuracies = []

for threshold in threshold_ranges:
    pred_mask = Binarizer(threshold = threshold).transform(preds.reshape(-1, 1)).reshape(preds.shape)
    accuracies.append((pred_mask == y).sum() / pred_mask.size)

threshold_results_df = pd.DataFrame({
    'threshold': threshold_ranges,
    'accuracy': accuracies
})

threshold_results_df.round(3).sort_values('accuracy', ascending = False)

In [None]:
# Visualizing Predictions with optimal threshold for creating binary mask
num_preds = 5
sample_pred_ids = np.random.randint(len(X), size = num_preds)

pred_y = model.predict(X[sample_pred_ids])
pred_y_mask = Binarizer(threshold = 0.4).transform(pred_y.reshape(-1, 1)).reshape(pred_y.shape)

plt.figure(figsize = (20 , 20))
for i in range(num_preds):
    
    image_id = list(train_data.keys())[sample_pred_ids[i]]
    
    
    plt.subplot(num_preds, 4, (4 * i) + 1)
    plt.imshow(X[sample_pred_ids[i]], cmap = 'gray')
    plt.axis('off')
    plt.title(f'Input image - {image_id}', fontsize = 16)

    
    plt.subplot(num_preds, 4, (4 * i) + 2)
    plt.imshow(y[sample_pred_ids[i]], cmap = 'gray')
    plt.axis('off')
    plt.title('Expected output mask', fontsize = 16)
    
    plt.subplot(num_preds, 4, (4 * i) + 3)
    plt.imshow(pred_y[i], cmap = 'gray')
    plt.axis('off')
    plt.title('Predicted mask', fontsize = 16)
    
    plt.subplot(num_preds, 4, (4 * i) + 4)
    plt.imshow(pred_y_mask[i], cmap = 'gray')
    plt.axis('off')
    plt.title('Binarized mask', fontsize = 16)
    
plt.suptitle("Sample inputs and outputs", fontsize = 24)
plt.tight_layout(rect = [0, 0, 0.90, 1])
plt.show()

In [None]:
img_ids = []
rle_encondings = []
for i in range(num_preds):
    
    img_id = list(train_data.keys())[sample_pred_ids[i]]
    img_ids.append(img_id)
    
    rle_encoding = rle_encode(pred_y_mask[i])
    rle_encondings.append(rle_encoding)

sample_submission = pd.DataFrame({
    'id': img_ids,
    'predicted': rle_encondings
})

sample_submission