# Weighted Ensemble
We used this notebook to perform weighted ensemble of our best models.

In [None]:
# Fix randomness and hide warnings
seed = 42

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd()+'/configs/'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

import numpy as np
# np.random.seed(seed)

import logging

import random
random.seed(42)

In [None]:
# Import tensorflow
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
from tensorflow.keras import initializers
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
print(tf.__version__)

In [None]:
# Import other libraries
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from sklearn.utils import class_weight
import seaborn as sns

# IMPORT DATA

In [None]:
TRAIN_DIR="/kaggle/input/folders/data/training_folder"
VAL_DIR="/kaggle/input/folders/data/validation_folder"
TEST_DIR="/kaggle/input/folders/data/test_folder"
IMG_SHAPE=(96,96)
BATCH_SIZE=32

In [None]:
import glob
import cv2

In [None]:
training_h='/kaggle/input/folders/data/training_folder/healthy/*.jpg'
training_un='/kaggle/input/folders/data/training_folder/unhealthy/*.jpg'
validation_h='/kaggle/input/folders/data/validation_folder/healthy/*.jpg'
validation_un='/kaggle/input/folders/data/validation_folder/unhealthy/*.jpg'
test_h='/kaggle/input/folders/data/test_folder/healthy/*.jpg'
test_un='/kaggle/input/folders/data/test_folder/unhealthy/*.jpg'

obj_tr_h=glob.glob(training_h)
obj_tr_un=glob.glob(training_un)
obj_val_h=glob.glob(validation_h)
obj_val_un=glob.glob(validation_un)
obj_ts_h=glob.glob(test_h)
obj_ts_un=glob.glob(test_un)

In [None]:
y_val=[]
y_val_mu=[]

x_val=[]

for el in obj_val_h:
    im=cv2.cvtColor(cv2.imread(el), cv2.COLOR_BGR2RGB)
    x_val.append(im)
    y_val.append([np.float32(0)])
    y_val_mu.append([np.float32(1),np.float32(0)])
    
for el in obj_val_un:
    im=cv2.cvtColor(cv2.imread(el), cv2.COLOR_BGR2RGB)
    x_val.append(im)
    y_val.append([np.float32(1)])    
    y_val_mu.append([np.float32(0),np.float32(1)])    
    
shuffle_indexes=np.arange(len(y_val))
np.random.seed=seed
np.random.shuffle(shuffle_indexes)

x_val=np.array(x_val)
y_val=np.array(y_val)
y_val_mu=np.array(y_val_mu)

x_val=x_val[shuffle_indexes]
y_val=y_val[shuffle_indexes]
y_val_mu=y_val_mu[shuffle_indexes]

In [None]:
print(x_val.shape)
print(y_val.shape)

In [None]:
AUTO = tf.data.AUTOTUNE
BATCH_SIZE = 32
IMG_SIZE = 96

In [None]:
y_val = tf.keras.utils.to_categorical(y_val, num_classes=2)

In [None]:
def preprocess_image(image, label):
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    image = tf.image.convert_image_dtype(image, tf.float32) 
    return image, label

In [None]:
val_ds=tf.data.Dataset.from_tensor_slices((x_val, y_val))

val_ds = (
    val_ds.map(preprocess_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# train generator with augmentation
train_image_gen  = ImageDataGenerator()                                      

train_dataset = train_image_gen.flow_from_directory(directory=TRAIN_DIR,
                                                    target_size=IMG_SHAPE,
                                                   # color_mode='rgb',
                                                    classes=None,
                                                    class_mode='binary',
                                                    batch_size=BATCH_SIZE,
                                                    shuffle=True,
                                                    seed=seed,
                                                    )

In [None]:
class_weights = class_weight.compute_class_weight(class_weight='balanced',
                                                  classes=np.unique(train_dataset.classes),
                                                  y=train_dataset.classes)

class_weights = dict(zip(np.unique(train_dataset.classes), class_weights))
class_weights

# IMPORT MODELS

In [None]:
m1 = tfk.models.load_model('/kaggle/input/mobilenet-80acc/MobileNetV2_80acc')
m1 = tfk.models.Model(inputs=m1.inputs, outputs=m1.outputs, name="MobileNet")

In [None]:
m2 = tfk.models.load_model('/kaggle/input/resnet-80acc/RESNET 80 ACC/ResNet50_TL2')
m2 = tfk.models.Model(inputs=m2.inputs, outputs=m2.outputs, name="ResNet")

In [None]:
m3 = tfk.models.load_model('/kaggle/input/convnext/ConvNext/SubmissionModel')
m3 = tfk.models.Model(inputs=m3.inputs, outputs=m3.outputs, name="ConvNextTiny")

In [None]:
m4 = tfk.models.load_model('/kaggle/input/convnextbase/Convnext_base/SubmissionModel')
m4 = tfk.models.Model(inputs=m4.inputs, outputs=m4.outputs, name="ConvNextBase")

In [None]:
models = [m1, m2, m3, m4]

# BUILD ENSEMBLE

In [None]:
model_input = tfk.Input(shape=(96, 96, 3))
model_outputs = [model(model_input) for model in models]
ensemble_output = tfk.layers.Average()(model_outputs)
ensemble_model = tfk.Model(inputs=model_input, outputs=ensemble_output)

In [None]:
ensemble_model.summary()

We built a custom layer to apply weights to the ensemble model

In [None]:
class WeightedAverageLayer(tfk.layers.Layer):
    def __init__(self, w1, w2, w3, w4, **kwargs):
        super(WeightedAverageLayer, self).__init__(**kwargs)
        self.w1 = w1
        self.w2 = w2
        self.w3 = w3
        self.w4 = w4
        
    def call(self, inputs):
        return self.w1*inputs[0] + self.w2*inputs[1] + self.w3*inputs[2] + self.w4*inputs[3]

In [None]:
ensemble_output = WeightedAverageLayer(0.8, 0.8, 1, 1) (model_outputs)
ensemble_model = tfk.Model(inputs=model_input, outputs=ensemble_output)

# TEST THE ENSEMBLE
We used validation data to compute metrics of validation, the real test is performed with the submission

In [None]:
out = ensemble_model.predict(x_val, verbose=0)
# out = (pred >= 0.5).astype(float)
print(y_val[:15])
print(out[:15])

In [None]:
out2 = [np.argmax(el) for el in out]
y_val2 = [np.argmax(el) for el in y_val]
print(out2[:40])
print(y_val2[:40])

In [None]:
accuracy = tf.keras.metrics.BinaryAccuracy(
    name="binary_accuracy", dtype=None
)
accuracy.update_state(y_val2, out2)
accuracy.result().numpy()

In [None]:
precision = tf.keras.metrics.Precision(
    name="precision", dtype=None
)
precision.update_state(y_val2, out2)
precision.result().numpy()

In [None]:
recall = tf.keras.metrics.Recall(
    name="recall", dtype=None
)
recall.update_state(y_val2, out2)
recall.result().numpy()

In [None]:
f1_score = tf.keras.metrics.F1Score(
    name="f1_score", dtype=None
)
f1_score.update_state(np.reshape(y_val2, (len(y_val2),1)), np.reshape(out2, (len(out2),1)))
f1_score.result().numpy()

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
labels = ["Healthy", "Unhealthy"]

cm = confusion_matrix(y_val2, out2)

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)

disp.plot(cmap=plt.cm.Blues)
plt.show()

# Test Time Augmentation

In [None]:
def TTA(model,X):  
    test_datagen = ImageDataGenerator(
                rotation_range=90,
                # width_shift_range=0.1,
                # height_shift_range=0.1,
                # zoom_range=[1,1.3],
                # shear_range=0.1,
                brightness_range=(0.7,1.25),
                vertical_flip=True,
                horizontal_flip=True,
                fill_mode='reflect',
                )
    y_hats=[]

    n_steps=10
    BATCH=32
    
    for i in range(n_steps):
        preds=model.predict_generator(test_datagen.flow(X,batch_size=BATCH,shuffle=False), 
                                      steps=len(X)/BATCH)
        y_hats.append(preds)

    y_hats=np.array(y_hats)
    pred=np.mean(y_hats,axis=0)
    print(pred.shape)
    results= np.array([np.argmax(el) for el in pred])
    results = results.flatten()
    return results

In [None]:
results=TTA(ensemble_model,x_val)
y_pred=results

In [None]:
accuracy = tf.keras.metrics.BinaryAccuracy(
    name="binary_accuracy", dtype=None
)
accuracy.update_state(y_val2, y_pred)
accuracy.result().numpy()

In [None]:
precision = tf.keras.metrics.Precision(
    name="precision", dtype=None
)
precision.update_state(y_val2, y_pred)
precision.result().numpy()

In [None]:
recall = tf.keras.metrics.Recall(
    name="recall", dtype=None
)
recall.update_state(y_val2, y_pred)
recall.result().numpy()

In [None]:
f1_score = tf.keras.metrics.F1Score(
    name="f1_score", dtype=None
)
f1_score.update_state(np.reshape(y_val2, (len(y_val2),1)), np.reshape(y_pred, (len(y_pred),1)))
f1_score.result().numpy()

In [None]:
labels = ["Healthy", "Unhealthy"]

cm = confusion_matrix(y_val2, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap=plt.cm.Blues)
plt.show()

In [None]:
ensemble_model.save("/kaggle/working/ensemble_conv")

In [None]:
modelo = tfk.models.load_model("/kaggle/working/ensemble_conv")