In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive/My Drive/Homework1

Mounted at /gdrive
/gdrive/My Drive/Homework1


In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
import matplotlib.pyplot as plt
from random import randint

In [None]:
seed = 16
import os
import random

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
def load_data(folder="public_data.npz", resolution=96, head_only=False):
    images = []

    loaded = np.load(folder, allow_pickle=True)

    # Iterate through files in the specified folder
    for i, img in enumerate(loaded['data']):
        # Normalize image pixel values to a float range [0, 1]
        #img = (img / 255).astype(np.float32)

        # Convert image from BGR to RGB
        #img = img[...,::-1]

        # Make the image dataset squared
        dim = min(img.shape[:-1])
        img = img[(img.shape[0]-dim)//2:(img.shape[0]+dim)//2, (img.shape[1]-dim)//2:(img.shape[1]+dim)//2, :]

        # Resize the image to 224x224 pixels
        #img = tfkl.Resizing(224, 224)(img)
        img = tfkl.Resizing(resolution, resolution)(img)

        if img is not None:
            images.append(img)

        if (head_only and i == 9):
           break

    labels = loaded['labels']
    loaded.close()

    if (head_only):
       labels = labels[:10]

    y = LabelEncoder().fit_transform(labels)
    y = tfk.utils.to_categorical(y, 2)

    return np.array(images), y




def display_random_images(X, y, num_img=10):
  # Create subplots for displaying items
  fig, axes = plt.subplots(2, num_img//2, figsize=(20, 9))
  for i in range(num_img):
      image = randint(0, X.shape[0] - 1)

      ax = axes[i%2, i%num_img//2]
      ax.imshow(np.clip(X[image], 0, 255))  # Display clipped item images
      ax.text(0.5, -0.1, str(image) + ' ' + str(y[image]), size=12, ha="center", transform=ax.transAxes)
      ax.axis('off')
  plt.tight_layout()
  plt.show()




def delete_outliers(X, y):
  shrek = 137
  trololo = 5143

  new_X = []
  new_y = []

  num_outliers = 0

  for i, sample in enumerate(X):
    if (not (np.array_equal(sample, X[shrek]) or np.array_equal(sample, X[trololo]))):
      new_X.append(sample)
      new_y.append(y[i])
    else:
      num_outliers += 1

  return np.array(new_X), np.array(new_y), num_outliers

In [None]:
X, y = load_data('public_data.npz')
X, y, num_outliers = delete_outliers(X, y)

In [None]:
# Split data into train_val and test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=600, stratify=np.argmax(y,axis=1))

# Further split train_val into train and validation sets
X_train, X_val, y_train_0, y_val = train_test_split(X_train_val, y_train_val, test_size=600, stratify=np.argmax(y_train_val,axis=1))

print(X_train.shape, y_train_0.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

(3804, 96, 96, 3) (3804, 2)
(600, 96, 96, 3) (600, 2)
(600, 96, 96, 3) (600, 2)


In [None]:
# the ensemble of models
model_1 = tfk.models.load_model('Efficientnet_finetuned_NO_INVERSION')
model_2 = tfk.models.load_model('Xception_NO_INVERSION_finetuned')
model_3 = tfk.models.load_model('mobilenetv2_finetuned_NO_INVERSION')
model_4 = tfk.models.load_model('convnext_finetuned_nobatchnorm_NO_INVERSION')
model_5 = tfk.models.load_model('convnext_nobatchnorm_NO_INVERSION')

In [None]:
# renaming (different names required)
model_2._name = model_2.name + str('_2')
model_3._name = model_3.name + str('_3')
model_4._name = model_4.name + str('_4')
model_5._name = model_5.name + str('_5')

In [None]:
input_layer = tfkl.Input(shape = (96,96,3), name = 'input_layer')    # single input layer connected to each model

branch_1 = model_1(input_layer)    # efficientnet has incorporated preprocessing

branch_2_preprocess = tf.keras.layers.Lambda(tfk.applications.xception.preprocess_input)(input_layer)    # lambda layer
branch_2 = model_2(branch_2_preprocess)

branch_3_preprocess = tf.keras.layers.Lambda(tfk.applications.mobilenet_v2.preprocess_input)(input_layer)
branch_3 = model_3(branch_3_preprocess)

branch_4 = model_5(input_layer)

branch_5 = model_5(input_layer)

# some preptrained networks, such as mobilenet, xception, ... require preprocessing, that can directly be included separately for
# each model with a lambda layer, that contains a custom function

concat = tfkl.Add()([branch_1, branch_2, branch_3, branch_4, branch_5])    # adding the 5 outcomes
divide = tfkl.Lambda(lambda x: x/5)(concat)    # lambda layer to normalize the probabilities
final_model = tfk.Model(inputs=input_layer, outputs = divide)

In [None]:
final_model.compile(tfk.optimizers.AdamW(), tfk.losses.CategoricalCrossentropy(), metrics = 'accuracy')

In [None]:
final_model.evaluate(X_test,y_test)[-1]



0.9883333444595337

In [None]:
final_model.save('ensemble5')