# Import Required Modules

In [None]:
import pickle
import warnings
import numpy as np
import pandas as pd
from PIL import Image
from skimage.transform import resize
from tensorflow.keras.applications import densenet
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tqdm import tqdm
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

# Initialize Train, Test & Cross-Validation Data

In [None]:
TRAIN_DATA_CSV_PATH = 'train_Data.csv'
TEST_DATA_CSV_PATH = 'test_Data.csv'
CV_CSV_PATH = 'cv_Data.csv'

In [None]:
train_dataset = pd.read_csv(TRAIN_DATA_CSV_PATH)
test_dataset = pd.read_csv(TEST_DATA_CSV_PATH)
cv_dataset = pd.read_csv(CV_CSV_PATH)

In [None]:
# Change file path to file name for each image
for key in ['Image1', 'Image2']:
    train_dataset[key] = train_dataset[key].apply(lambda x: x.split('/')[-1])
    test_dataset[key] = test_dataset[key].apply(lambda x: x.split('/')[-1])
    cv_dataset[key] = cv_dataset[key].apply(lambda x: x.split('/')[-1])

# Load DenseNet121 Model

In [None]:
densenet_121 = densenet.DenseNet121(include_top=False, weights = None, input_shape=(224,224,3), pooling="avg")
densenet_output = densenet_121.output
densenet_output = Dense(14, activation="sigmoid", name="predictions")(densenet_output)
chexnet_model = Model(inputs=densenet_121.input, outputs=densenet_output)

# Load Pre-Trained Weights & Change Output To Second Last Layer

In [None]:
# This file holds weights for CheXNet, including the last layer; an extra layer is temporarily added in the preceding cell to accommodate these weights, intended for subsequent removal.
CHEXNET_WEIGHT_PATH = 'brucechou1983_CheXNet_Keras_0.3.0_weights.h5'
chexnet_model.load_weights(CHEXNET_WEIGHT_PATH)
chexnet_model = Model(inputs = chexnet_model.input, outputs = chexnet_model.layers[-2].output)

# Load, Convert & Resize Image

In [None]:
def load_image(img_path):
    loaded_image = Image.open(img_path)
    rgb_image = np.asarray(loaded_image.convert("RGB"))
    rgb_image = rgb_image / 255.0
    rgb_image = resize(rgb_image, (224,224))
    resized_image = np.asarray(np.expand_dims(rgb_image, axis=0))
    return resized_image

# Update Image Features

In [None]:
IMG_DIR = "Scanned Images/"
def update_image_features_map(features_map, data):
    for uid, img1_file, img2_file, report in tqdm(data.values):
        img1_path = IMG_DIR + img1_file
        img2_path = IMG_DIR + img2_file
        loaded_img1 = load_image(img1_path)
        loaded_img2 = load_image(img2_path)
        img1_features = chexnet_model.predict(loaded_img1)
        img2_features = chexnet_model.predict(loaded_img2)
        concat_img_feature = np.concatenate((img1_features, img2_features), axis=1)
        features_map[uid] = concat_img_feature

# Get Image Features For All Images

In [None]:
def generate_image_features(train_data, test_data, cv_data):
    enc_dec_image_features = {}
    update_image_features_map(enc_dec_image_features, train_data)
    update_image_features_map(enc_dec_image_features, test_data)
    update_image_features_map(enc_dec_image_features, cv_data)
    return enc_dec_image_features

In [None]:
enc_dec_image_features = generate_image_features(train_dataset, test_dataset, cv_dataset)

# Save Pickle File For Generated Image Features For Future Usage

In [None]:
SAVE_FILE_PATH = 'Image_Features_Enc_Dec.pickle'
with open(SAVE_FILE_PATH, 'wb') as file:
    pickle.dump(enc_dec_image_features, file)