# AutoGluon Submission

## Imports

In [None]:
import gc
import os
from multiprocessing.dummy import Pool

import pandas as pd
import pyvips
from autogluon.vision import ImagePredictor


## Environment Variables

In [None]:
# Assumption that this is a Kaggle environment.
INPUT_DIRECTORY = "../input/mayo-clinic-strip-ai"
TEST_IMAGE_DIR = "../input/mayo-clinic-strip-ai/test"
IMAGE_OUTPUT_DIRECTORY = "/kaggle/working/scaled_images"
MODEL_PATH = "../input/autogluon052-standalone-and-model/model-01.ag"


## Helper Functions

In [None]:
def preprocess_image(image_id, maxw, maxh):

    # Image is resized to maxw x maxh, smart cropped, and then saved to disk as a JPEG.

    # The smart crop is applied with attention features.
    # Other options: https://libvips.github.io/pyvips/enums.html#pyvips.enums.Interesting

    out = pyvips.Image.thumbnail(
        os.path.join(TEST_IMAGE_DIR, image_id + ".tif"),
        maxw,
        height=maxh,
        crop="attention",
    )
    out.write_to_file(os.path.join(IMAGE_OUTPUT_DIRECTORY, image_id + ".jpeg"), Q=100)
    del out
    gc.collect

def save_dataset(num_workers, iterable):
    # Enable verbose logging.
    os.environ["VIPS_PROGRESS"] = "1"
    # Limit pyvips to two threads.
    os.environ["VIPS_CONCURRENCY"] = "2"

    pool = Pool(num_workers)
    results = pool.starmap(preprocess_image, iterable)
    pool.close()
    pool.join()
    

## Process Images

In [None]:
 # Load the test data, used for evaluation.
test_df = pd.read_csv(f"{INPUT_DIRECTORY}/test.csv")

# Resize images, apply smart crop, and save to disk.
# Source: https://github.com/libvips/pyvips/issues/291#issuecomment-994714555
image_ids = test_df["image_id"]
max_width = 1024
max_height = 1024
iter = [(image_id, max_width, max_height) for image_id in image_ids]
save_dataset(num_workers=3, iterable=iter)

# Add file path of transformed images to dataframe.
test_df["image"] = test_df["image_id"].apply(
    lambda x: os.path.join(IMAGE_OUTPUT_DIRECTORY, x + ".jpeg")
)


## Make Predictions

In [None]:
# Load model.
trained_model = ImagePredictor.load(
    path=MODEL_PATH
)

# Make predictions.
df_predict_probas = trained_model.predict_proba(test_df).rename(
    columns={1: "LAA", 0: "CE"}
)

# Merge predictions with original dataframe.
predictions = test_df.merge(df_predict_probas, left_index=True, right_index=True)

# Columns for submission requires patient_id, LAA, and CE.
submisson = predictions[["patient_id", "CE", "LAA"]]

# Only one prediction per patient is allowed. So, we take the average of the predictions for each patient.
final_submission = (
    submisson.sort_values(by="patient_id").groupby("patient_id").mean()
)

# Save predictions.
final_submission.to_csv("submission.csv")
