# Transfer Learning Submission

## Imports

In [None]:
import gc
import glob
import os
from glob import glob

import numpy as np
import pandas as pd
import pyvips
import skimage.io as io
from keras.models import load_model
from PIL import Image
from skimage.color import rgb2gray, rgba2rgb
from skimage.filters.thresholding import threshold_otsu
from skimage.util import img_as_ubyte
from tensorflow import keras
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing import image as image_utils
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tqdm.auto import trange


## Helper Function

In [None]:
def pre_process_data(path, save_path):
    """Reads in the data from the path and saves the image to disk."""
    try:
        # 1792x1792 crop was used for training. I did not have enough time to evaluate the impact of using 896x896 on test data.
        img = pyvips.Image.thumbnail(path, 896, crop="attention")
  
        image = np.ndarray(
            buffer=img.write_to_memory(),            
            shape=[img.height, img.width, img.bands],
        )

        # Thresholding the image
        image_gray = rgb2gray(image)
        # Find threshold between background and foreground
        thresh = threshold_otsu(image_gray)
        binary = image_gray <= thresh
        # Source: https://stackoverflow.com/questions/72239660/how-can-one-apply-a-mask-on-a-numpy-array-which-leaves-the-original-values-uncha
        image = image[:, :, ...] * binary[..., None]

        # Remove regions with no signal.
        # Source: https://www.kaggle.com/code/abhishek123maurya/image-cropping-without-altering-pixel-values
        # Iterate through "rows" of the image.
        rm = [i for i in range(image.shape[0]) if len(np.unique(image[i, :])) <= 75]
        img = np.delete(image, rm, axis=0)
        # Iterate through "columns" of the image.
        rm = [i for i in range(image.shape[1]) if len(np.unique(image[:, i])) <= 75]
        img = np.delete(image, rm, axis=1)

        io.imsave(save_path, image, quality=100)
    finally:
        del rm
        del binary
        del image_gray
        del image
        del img
        gc.collect()


## Process Images


In [None]:
# Load the test data.
test_df = pd.read_csv("../input/mayo-clinic-strip-ai/test.csv")

# Save images to current directory for inference.
os.mkdir("./scaled_images")

# Crop and resize images.
train_images = glob("../input/mayo-clinic-strip-ai/test/*.tif")
for i in trange(len(train_images)):
    pre_process_data(
        train_images[i],
        train_images[i]
        .replace("../input/mayo-clinic-strip-ai/test/", "./scaled_images/")
        .replace(".tif", ".jpeg"),
    )


In [None]:
# Add file path of transformed images to dataframe.
test_df["image"] = test_df["image_id"].apply(
    lambda x: os.path.join("./scaled_images", x + ".jpeg")
)
test_df.head()


## Make Predictions

In [None]:
# Load trained model.
pretrained_model = load_model("../input/mayoclinic-efficientnetb4/TransferLearn_EfficientNet_Mayo.h5")

SIZE = 384
datagen_valid = ImageDataGenerator(samplewise_center=True)

def make_predictions(image_path):
    image = image_utils.load_img(image_path, target_size=(384, 384))
    image = image_utils.img_to_array(image)
    image = image.reshape(1, SIZE, SIZE, 3)
    image = datagen_valid.standardize(image)
    preds = pretrained_model.predict(image)    
    return preds

# Save predictions to dataframe.
df_predict_probas = pd.DataFrame()
df_predict_probas["CE"] = test_df["image"].apply(lambda x: make_predictions(x)[0][0])
df_predict_probas["LAA"] = test_df["image"].apply(lambda x: make_predictions(x)[0][1])

# Merge predictions with patient_id.
predictions = test_df.merge(df_predict_probas, left_index=True, right_index=True)

# Columns for submission.
predictions_clean = predictions[["patient_id", "CE", "LAA"]]

# Only one prediction per patient is allowed.
submission = predictions_clean.sort_values(by="patient_id").groupby("patient_id").mean()

# Save predictions to csv.
submission.to_csv("submission.csv")
