<a href="https://colab.research.google.com/github/mcjauregui/CNN_MedImageClassification/blob/main/EnsembleChain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Two Functional Models (Pretrained and Custom): Trained and Evaluated Individually, as an Ensemble, and Chained



In [None]:
# Import dependencies
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as skl
import tensorflow as tf
import os
import cv2
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense,Conv2D,MaxPooling2D,Flatten,Dropout, Rescaling
from tensorflow.keras.layers import RandomFlip, RandomRotation, RandomZoom
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Input, Average

In [None]:
from google.colab import drive
drive.mount('/content/drive')

#Check Google Drive contents to verify files location
#!ls /content/drive/MyDrive/BOOTCAMP

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# import Image module from Python Imaging Library (PIL), library for opening, manipulating, and saving image file formats
# allows performing various operations like opening, resizing, cropping, enhancing, saving images, creating Image objects, loading images from files, manipulating images, performing image processing tasks

from PIL import Image
import matplotlib.pyplot as plt

# Define base directory
base_dir = '/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data'    #Directory Structure: Ensure base_dir points to directory where images stored within Google Drive

# Initialize dictionary to store images
images = {}

# for root, _, files in os.walk(base_dir):      is part of Python loop that uses os.walk function to traverse directories and their contents
# os.walk     os module function that generates file names in directory tree, by walking either top-down or bottom-up
# For each directory in tree rooted at directory specified by base_dir (including base_dir itself), os.walk yields tuple containing three values
# (1) root: string representing current directory path
# (2) dirs: list of names of directories in current directory
# (3) files: list of names of non-directory files in current directory

# Loop Components
# root      variable stores path of current directory
# _         convention in Python for variable that won't be used; Use of _ indicates ignoring value; Here, represents list of directory names in current directory
# files     variable stores list of non-directory file names in current directory

# os.walk() function from Python os module traverses through directory tree starting from base_dir and returns generator that yields tuple of (root, directories, files) for each directory it visits
# code traverses through directory structure (base_dir), identifies all PNG image files, and captures images' absolute file paths (file_path)
# dynamically locates and processes specific types of files within complex directory hierarchy, such as when working with large datasets or collections of images stored in Google Drive

# Iterate through all files in base directory and subdirectories
# os.walk() generates file names in directory tree by walking top-down or bottom-up (here, starts at base_dir and traverses through all directories and subdirectories recursively) and returns generator that yields tuple (root, directories, files) for each directory

for root, _, files in os.walk(base_dir):          # Loop iterates over each tuple returned by os.walk(); root is current directory path; _ (underscore) is placeholder for directories within root; 'files' is list of files in current directory (root)
    for file in files:                            # Within each directory (root), iterate through each file
        if file.endswith('.png'):                 # Check current file ending to filter only PNG image files (can adjust to '.jpg', '.jpeg', any image file extension)
            file_path = os.path.join(root, file)  # Construct full path to image file by joining root directory path with current file name to give absolute file path of each image

            #Read image file using OpenCV (cv2), convert color space from BGR to RGB, and store in dictionary where file name serves as key

            img = cv2.imread(file_path)   # (1) Open image file using OpenCV cv2.
                                          # imread(file_path) reads image file specified by 'file_path' and loads it to NumPy array ('img')
                                          # cv2.imread(file_path) reads image from specified file path and returns it as NumPy array
                                          # OpenCV library is built on top of NumPy and uses NumPy arrays for image representation and manipulation
                                          # Array represents image in BGR (Blue-Green-Red) color format by default

            # (2) Convert BGR format to RGB format for displaying correctly with matplotlib

            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            # (3) Store images ('img_rgb') in dictionary as values in key-value pairs, where 'files' = key
            # dictionary[file] = value    is syntax for accessing or assigning value ('img_rgb') to specific key ('file') within initialized dictionary ('images')
            # 'file' variable acts as key in dictionary; Each unique filename will be separate key in dictionary
            # img_rgb is value associated with key 'file'; a NumPy array representing image data in RGB format

            images[file] = img_rgb    # This line adds key-value pairs to 'images' {} dictionary, where 'file' is key and 'img_rgb' is value
                                      # Dictionaries in Python are collections of key-value pairs, where each key is unique

# Storing Data: executing images[file] = img_rgb tells Python to add or update an entry in images dictionary
# Unique Keys: Each unique file (filename) used as key ensures entries unique in dictionary (using same filename multiple times overwrites previous value associated with key)

# When images stored in dictionary with filename as key (images[file] = img_rgb), value associated with each key (the filename) is image data itself
# Key in dictionary (file) is string representing filename of image file being processed
# Value associated with each key is image data stored as NumPy array (img_rgb) containing pixel data of image after being read and processed (converted from BGR to RGB)

print(images.keys())
# 'dict_keys' part of output comes from way Python's print function displays dictionary keys when using keys() method
# images.keys() returns a dict_keys view object containing keys of images dictionary
# print(images.keys()) prints string representation of dict_keys view object, which includes 'dict_keys' followed by list of keys

    # print(images.keys()) does two things:
    # (1) Calling 'images.keys()'   returns view object with all keys in dictionary; view object is of type 'dict_keys' -- not a list, but similar
    # (2) Printing the Output       passing images.keys() to print function, Python converts view object to string representation for printing
    # String representation of dict_keys view object includes type name 'dict_keys' followed by actual list of keys enclosed in parenthesis

# Display example image
#if images:  # condition checks if images dictionary is not empty; In Python, empty dictionary evaluates to False, non-empty dictionary evaluates to True
#    example_key = next(iter(images))  # Get first key in dictionary: iter(images) creates iterator over dictionary keys, next(iter(images)) retrieves 1st key from iterator to get one example image from dictionary to display
#    plt.imshow(images[example_key])   #uses Matplotlib to display image associated with example_key; value corresponding to example_key in images dictionary is image data (NumPy array representing image in RGB format)
#    plt.axis('off')  # Turn off axis: turns off axis labels and ticks, making display cleaner by removing coordinate system
#    plt.title(example_key)  # Display image file name as title: sets plot title to value of example_key, the filename of the image, to identify which image being displayed
#    plt.show()    #Displays plot with image
#else:
#    print("No images found in the specified directory structure.")

dict_keys(['000115 (5).png', '000115.png', '000116 (5).png', '000109 (3).png', '000116 (9).png', '000114.png', '000115 (2).png', '000112 (2).png', '000116 (3).png', '000113 (3).png', '000115 (9).png', '000117.png', '000112 (9).png', '000109 (8).png', '000108 (8).png', '000110 (7).png', '000109 (4).png', '000108 (7).png', '000113.png', '000114 (10).png', '000116 (8).png', '000117 (6).png', '000111 (2).png', '004162_01_01_150.png', '4 (2).png', '004007_01_01_519.png', '003828_02_01_174.png', '8 - Copy (3).png', '4 - Copy (2).png', '7.png', '6 - Copy.png', '6 - Copy (2) - Copy.png', '7 - Copy (3).png', '5.png', '6 - Copy (3).png', '7 - Copy (2).png', '000110.png', '000128.png', '000120.png', '000130.png', '000118 (2).png', '000112.png', '000108 (2).png', '000109.png', '000113 (2).png', '000110 (2).png', '000108.png', '000116.png', '000131.png', '000126.png', '000115 (3).png', '000122.png', '000111.png', '000119 (5).png', '000119.png', '000118 (5).png', '000116 (2).png', '000114 (4).png', 

In [None]:
# Define base directory
base_dir = '/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data'

# Verify each directory level
drive_dir = '/content/drive/MyDrive'
bootcamp_dir = os.path.join(drive_dir, 'BOOTCAMP')
colab_notebooks_dir = os.path.join(bootcamp_dir, 'ColabNotebooks')
project_dir = os.path.join(colab_notebooks_dir, 'ProjectWithGreg')
data_dir = os.path.join(project_dir, 'Data')

# Print contents at each level to ensure correctness
#print("Contents of MyDrive:", os.listdir(drive_dir))
#print("Contents of BOOTCAMP:", os.listdir(bootcamp_dir))
#print("Contents of ColabNotebooks:", os.listdir(colab_notebooks_dir))
#print("Contents of ProjectWithGreg:", os.listdir(project_dir))
#print("Contents of Data:", os.listdir(data_dir))

In [None]:
#Set up directory paths using os.path.join() based on base directory base_dir

# Define base directory
base_dir = '/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data'

# Use os.path.join() to concatenate base_dir and 'train', set train_dir to 'Data/train'
# os.path.join(base_dir, 'train') takes base_dir, which = 'Data/', and concatenates it with string 'train'

train_dir = os.path.join(base_dir, 'train')

# Concatenate base_dir and 'test', set test_dir to 'Data/test'
# os.path.join(base_dir, 'test') takes base_dir, which = 'Data/', and concatenates it with string 'test'

test_dir = os.path.join(base_dir, 'test')

# Concatenate base_dir and 'valid', set valid_dir to 'Data/valid'
# os.path.join(base_dir, 'valid') takes base_dir, which = 'Data/', and concatenates it with string 'valid'

valid_dir = os.path.join(base_dir, 'valid')


### List the folders to see their arrangement

In [None]:
#Read contents of base_dir directory and return list of names of entries (files and directories) in it

os.listdir(base_dir)

['valid',
 'test',
 'train',
 'best_chained_model_resnet.keras',
 'best_model_base_sparse.keras',
 'best_model_resnet_manual.keras',
 'new_best_resnet_manual_sparse.keras',
 'best_chained_resnet_customcnn_manual_sparse.keras',
 'best_resnet_manual_sparse_X2.keras',
 'best_resnet_manual_sparse_X2_B.keras',
 'best_resnet_manual_sparse.keras',
 'best_model_manual_sparse.keras',
 'first_model_manual_sparse.keras',
 'best_second_manual_sparse.keras',
 'first_model.keras',
 'second_model.keras',
 'best_chained_model_manual_resnet_customcnn.keras',
 'ensemble_model.keras',
 'chained_model.keras']

# Create train, test and validation datasets

In [None]:
# Use tf.keras.preprocessing.image_dataset_from_directory to generate training_set, testing_set, validation_set

# image_dataset_from_directory method: images automatically labeled based on subdirectory names
# each subdirectory treated as a class and labels assigned as integers starting from 0

training_set = tf.keras.preprocessing.image_dataset_from_directory(
train_dir,                  # Purpose: This is directory path where training images are stored
                            # Structure: should contain subdirectories, each representing different class
                            # name of each subdirectory will be used as class label for images within it
seed=101,
image_size=(224, 224),
batch_size=32,
label_mode='int'           # to work with sparse labels, use 'int' as value for label_mode parameter
    )

testing_set = tf.keras.preprocessing.image_dataset_from_directory(
test_dir,
seed=101,
image_size=(224, 224),
batch_size=32,
label_mode='int'
    )

validation_set = tf.keras.preprocessing.image_dataset_from_directory(
valid_dir,
seed=101,
image_size=(224, 224),
batch_size=32,
label_mode='int'
    )

Found 613 files belonging to 4 classes.
Found 315 files belonging to 4 classes.
Found 72 files belonging to 4 classes.


##Use tf.keras.Input and tf.keras.layers to build first_model with Functional API


In [None]:
# Build first_model, ResNet50-based model, using Functional API

from tensorflow.keras.layers import BatchNormalization

# Specify img_size, channels, img_shape, and class_count before defining model and data pipeline
img_size = (224, 224)     # img_size 224x224 is what ResNet50 expects
channels = 3
img_shape = (img_size[0], img_size[1], channels)
class_count = len(training_set.class_names)   #class_names auto defined when image_dataset_from_directory creates dataset

# Define input tensor -- create necessary input tensor for Keras model
inputs = Input(shape=(224, 224, 3))       #defines shape and structure of input data that enters model (raw image data from dataset)

# Define data augmentation layers directly from tf.keras.layers
data_augmentation = tf.keras.Sequential([
    RandomFlip("horizontal"),
    RandomRotation(0.2),
    RandomZoom(0.2)
])

# Apply data augmentation to input tensor, store results in 'augmented_inputs'
augmented_inputs = data_augmentation(inputs)

# Apply rescaling to normalize images' pixel values before feeding images to ResNet50 layers
# '(augmented_inputs)' in Rescaling layer necessary because in Functional API we explicitly define data flow between layers by passing output of one as input to next
scaled_inputs = Rescaling(1./255)(augmented_inputs)       # (augmented_inputs) at end of satement explicitly indicates apply rescaling to previous layer's output
                                                          # Without passing (augmented_inputs) as input, model wouldn't know where to apply rescaling

# Define ResNet50 base model with scaled_inputs instantiated as input tensor
# pooling='max': base model will output tensor with shape (batch_size, channels), compatible with subsequent Dense layers without needing to flatten tensor
base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_tensor=scaled_inputs,
    pooling='max')

# Freeze layers of ResNet50 model to prevent them from being retrained
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers on top of base_model
x = base_model.output
x = BatchNormalization(axis=-1)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.25)(x)

# Define output layer with number of classes because models to be Direct Ensembled; both should have Dense layer with softmax activation function as output
# output will be vector representing class probabilities for 4-class problem
outputs = Dense(class_count, activation='softmax')(x)

# Build first_model by specifying inputs and outputs
first_model = Model(inputs=inputs, outputs=outputs)
# Because outputs variable represents model final output, when defining model using Model class, use outputs = outputs

# Summary of model to check architecture
first_model.summary()

Optimize, Prepare to Save, Define File Path



In [None]:
#Define optimizer
optimizer = Adam()

#Prepare to Save Model
base_dir = '/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data'

# Create base directory if it doesn't exist
if not os.path.exists(base_dir):
    os.makedirs(base_dir)

# Define full file path including base directory
first_filepath = os.path.join(base_dir, 'first_model.keras')

Define EarlyStopping and ModelCheckpoint callbacks


In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True, verbose=1)

# Create ModelCheckpoint callback to save maximum best model based on validation accuracy
checkpoint = ModelCheckpoint(first_filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

#patience=20: Training will stop after 20 epochs with no improvement in monitored metric (by default, validation loss)
#restore_best_weights=True: Ensures that after training stops, model weights are reverted to state observed during training

Compile, Train, Save First Model

In [None]:
#Compile model
first_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train first model with added callbacks
history = first_model.fit(
    x=training_set,
    epochs=100,
    verbose=1,
    validation_data=validation_set,
    callbacks=[checkpoint, early_stopping]
)

# Save model in specified directory
first_model.save(first_filepath)

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 164ms/step - accuracy: 0.4862 - loss: 1.2595
Epoch 1: val_accuracy improved from -inf to 0.37500, saving model to /content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data/first_model.keras
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 429ms/step - accuracy: 0.4886 - loss: 1.2572 - val_accuracy: 0.3750 - val_loss: 1.2283
Epoch 2/100
[1m19/20[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 266ms/step - accuracy: 0.5338 - loss: 1.0640
Epoch 2: val_accuracy did not improve from 0.37500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 278ms/step - accuracy: 0.5364 - loss: 1.0596 - val_accuracy: 0.2778 - val_loss: 1.5157
Epoch 3/100
[1m19/20[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 178ms/step - accuracy: 0.5634 - loss: 0.9972
Epoch 3: val_accuracy did not improve from 0.37500
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 198ms/step - 

Define second_model

In [None]:
# Define second_model, the custom cnn model, using Functional API

from tensorflow.keras.layers import Input, RandomFlip, RandomRotation, RandomZoom, Dense
from tensorflow.keras.layers import Rescaling, Conv2D, MaxPooling2D, Dropout, Flatten
from tensorflow.keras.models import Model

# Define input shape and model parameters
img_size = (224, 224)       # Resize to 224x224 because it's what ResNet50 expects
channels = 3
img_shape = (img_size[0], img_size[1], channels)
class_count = len(training_set.class_names)

# Define input layer
input_tensor = Input(shape=img_shape)

# Apply data augmentation layers
x = RandomFlip("horizontal")(input_tensor)
x = RandomRotation(0.2)(x)
x = RandomZoom(0.2)(x)

# Apply rescaling
x = Rescaling(1./255)(x)

# Define convolutional and pooling layers
x = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Dropout(0.25)(x)

x = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Dropout(0.25)(x)

# Flatten tensor
x = Flatten()(x)

# Add dense layers
x = Dense(128, activation='relu')(x)
x = Dropout(0.25)(x)

# Output layer
outputs = Dense(class_count, activation='softmax')(x)

# Define model
second_model = Model(inputs=input_tensor, outputs=outputs)

# Print model summary to verify
second_model.summary()


Optimize, Prepare to Save Model, Define Path

In [None]:
#Define optimizer
optimizer = Adam()

#Prepare to Save Model
base_dir = '/content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data'

# Create base directory if it doesn't exist
if not os.path.exists(base_dir):
    os.makedirs(base_dir)

# Define full file path including base directory
second_filepath = os.path.join(base_dir, 'second_model.keras')

Define EarlyStopping and ModelCheckpoint callbacks

In [None]:
#Define EarlyStopping and ModelCheckpoint callbacks
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True, verbose=1)

# Create ModelCheckpoint callback to save maximum best model based on validation accuracy
checkpoint = ModelCheckpoint(second_filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

Compile, Train, Save Second Model

In [None]:
#Compile second_model
second_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

#train model
history = second_model.fit(
    x=training_set,
    validation_data=validation_set,
    epochs=100,
    callbacks=[early_stopping, checkpoint],  # Include both callbacks
    verbose=1
)

# Save model to specified filepath
second_model.save(second_filepath)

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step - accuracy: 0.2504 - loss: 1.6982
Epoch 1: val_accuracy improved from -inf to 0.27778, saving model to /content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data/second_model.keras
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 226ms/step - accuracy: 0.2508 - loss: 1.6886 - val_accuracy: 0.2778 - val_loss: 1.3722
Epoch 2/100
[1m19/20[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 285ms/step - accuracy: 0.3650 - loss: 1.3349
Epoch 2: val_accuracy did not improve from 0.27778
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 294ms/step - accuracy: 0.3634 - loss: 1.3346 - val_accuracy: 0.2361 - val_loss: 1.3098
Epoch 3/100
[1m19/20[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 173ms/step - accuracy: 0.3799 - loss: 1.2683
Epoch 3: val_accuracy improved from 0.27778 to 0.44444, saving model to /content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWi

#Ensembling Two Models

In [None]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Input, Average
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization

In [None]:
#Load first_model and second_model from saved .keras

# Define full file path including base directory
first_filepath = os.path.join(base_dir, 'first_model.keras')
second_filepath = os.path.join(base_dir, 'second_model.keras')

first_model = load_model(first_filepath)
second_model = load_model(second_filepath)

In [None]:
# Extract labels from TensorFlow datasets (training_set, testing_set, validation_set) created using tf.keras.preprocessing.image_dataset_from_directory
# ensemble_model needs labels to compute loss (compare predictions to true labels) and update model during training

#training_set and validation_set are tf.data.Dataset objects that return batches of (images, labels)
#code below loops through datasets to extract images and labels
#tf.concat used to combine batch-wise labels into single tensor

import numpy as np

# function get_labels extracts labels from given dataset
# TensorFlow datasets often yield batches of data and labels together
# function iterates through each batch, extracts labels, concatenates labels into single array

def get_labels(dataset):
    labels = []
    for _, batch_labels in dataset:          #loop iterates over dataset, where batch_labels contains labels for batch of images
                                             # _ used to ignore image data since only interested in labels

        labels.append(batch_labels.numpy())  # batch_labels.numpy() converts TensorFlow tensors (which hold labels) into NumPy arrays
                                             # converted label arrays for each batch appended to labels list

    return np.concatenate(labels, axis=0)    # After iterating through all batches, np.concatenate(labels, axis=0) merges all label arrays from
                                             # list into single NumPy array, resulting in single array containing all labels from dataset

# Extract labels
y_train = get_labels(training_set)          # y_train will contain all labels from training_set
y_test = get_labels(testing_set)            # y_test will contain all labels from testing_set
y_val = get_labels(validation_set)          # y_val will contain all labels from validation_set

###Prepare data and build ensemble model to average outputs

In [None]:
#STEP 1: Generate Predictions from Submodels
#This generates predictions from both models with shape (None, 4)

# Generate predictions for training set
preds_first_model_train = first_model.predict(training_set)
preds_second_model_train = second_model.predict(training_set)

# Generate predictions for validation set
preds_first_model_val = first_model.predict(validation_set)
preds_second_model_val = second_model.predict(validation_set)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 273ms/step
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 175ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step


In [None]:
#STEP 2: Define EarlyStopping and ModelCheckpoint callbacks

# Define EarlyStopping and ModelCheckpoint callbacks
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True, verbose=1)

# Define new file path to save ensemble_model
ensemble_filepath = os.path.join(base_dir, 'ensemble_model.keras')

# Create ModelCheckpoint callback to save maximum best ensemble_model based on validation accuracy
checkpoint = ModelCheckpoint(ensemble_filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

In [None]:
#Step 3: Build And Train ensemble_model To Process Combined Predictions

from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Average, Dense
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# When averaging outputs, input shape to ensemble_model will reflect how outputs get combined; resulting shape remains (None, 4), combining 2 predictions per class

# 1. Average Predictions:
    # a. Average predictions from both models for 'training set'
ensemble_input_train = (preds_first_model_train + preds_second_model_train) / 2

    # b. Average predictions from both models for 'validation set'
ensemble_input_val = (preds_first_model_val + preds_second_model_val) / 2

# 2. Build ensemble_model
  # a. Define input layer for ensemble_model (shape corresponds to 4 classes)
ensemble_input = Input(shape=(4,))

  # b. Add dense layer
final_output = Dense(4, activation='softmax')(ensemble_input)  # 4 classes

  # c. Define ensemble model
ensemble_model = Model(inputs=ensemble_input, outputs=final_output)

# 3. Compile ensemble model
ensemble_model.compile(optimizer='adam',
                       loss='sparse_categorical_crossentropy',
                       metrics=['accuracy'])

# 4. Train ensemble model on averaged predictions

history = ensemble_model.fit(                           #initiates training for ensemble_model; Keras fit method trains model for fixed number of epochs using
                                                        #provided training data and labels. Returns history object with loss & accuracy values at each epoch

    x=ensemble_input_train,                             #specifies input data to train ensemble_model; averaged predictions from submodels with shape (None, 4)

    y=y_train,                                          #Use same labels from original dataset to specify labels for training data; y_train represents true labels
                                                        #corresponding to ensemble_input_train predictions; labels required to calculate loss during training

    validation_data=(ensemble_input_val, y_val),        #specifies validation data to be used to evaluate model after each epoch
                                                        #ensemble_input_val contains averaged predictions from submodels on validation set; y_val contains true labels

    epochs=100,                                         #Specifies # of epochs for which model will train; epoch = one complete pass through entire training dataset
                                                        #Model will train for 100 epochs, updating weights after each batch of data within an epoch

    callbacks=[early_stopping, checkpoint],             #early_stopping stops training early if val loss or accuracy doesn't improve for specified # of epochs, helps
                                                        #prevent overfitting; checkpoint saves model’s weights at certain points to ensure restoration of best model

    verbose=1                                           #detailed progress of each epoch, loss, accuracy, validation metrics displayed in output
)

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.2544 - loss: 1.4251
Epoch 1: val_accuracy improved from -inf to 0.25000, saving model to /content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data/ensemble_model.keras
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 186ms/step - accuracy: 0.2535 - loss: 1.4251 - val_accuracy: 0.2500 - val_loss: 1.4418
Epoch 2/100
[1m 1/20[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 20ms/step - accuracy: 0.2188 - loss: 1.4235
Epoch 2: val_accuracy did not improve from 0.25000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2364 - loss: 1.4283 - val_accuracy: 0.2361 - val_loss: 1.4379
Epoch 3/100
[1m 1/20[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - accuracy: 0.1250 - loss: 1.4656
Epoch 3: val_accuracy did not improve from 0.25000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accur

In [None]:
#STEP 4: Save model in specified directory and display summary

# Save model in specified directory
ensemble_model.save(ensemble_filepath)

#Display ensemble model summary
ensemble_model.summary()


#Chaining first_model and second_model

In [None]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
from tensorflow.keras.layers import Input, RandomFlip, RandomRotation, RandomZoom, Rescaling, Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.layers import BatchNormalization

When chaining two models, specify data augmentation and rescaling only in first model, not in second

In [None]:
#DEFINE MOD_FIRST_MODEL, A MODIFIED VERSION of FIRST_MODEL WITH FUNCTIONAL API
#DO NOT TRAIN MODEL INDEPENDENTLY, JUST DEFINE IT

from tensorflow.keras.models import Model

# Specify img_size, channels, img_shape, and class_count before defining model and data pipeline
img_size = (224, 224)                         # img_size 224x224 is what ResNet50 expects
channels = 3
img_shape = (img_size[0], img_size[1], channels)
class_count = len(training_set.class_names)   #class_names auto defined when image_dataset_from_directory creates dataset

# Define input tensor -- create necessary input tensor for Keras model
inputs = Input(shape=(224, 224, 3))

# Define data augmentation layers directly from tf.keras.layers
data_augmentation = tf.keras.Sequential([
    RandomFlip("horizontal"),
    RandomRotation(0.2),
    RandomZoom(0.2)
])

# Apply data augmentation to input tensor, store results in 'augmented_inputs'
augmented_inputs = data_augmentation(inputs)

# Apply rescaling to normalize images' pixel values before feeding images to ResNet50 layers
# Including (augmented_inputs) as part of Rescaling layer necessary; Without passing (augmented_inputs) as input, model wouldn't know where to apply rescaling
scaled_inputs = Rescaling(1./255)(augmented_inputs)       # (augmented_inputs) at end of satement explicitly indicates apply rescaling to previous layer's output

# Define ResNet50 base model with scaled_inputs as input tensor
# pooling='max': base model will output tensor with shape (batch_size, channels), compatible with subsequent Dense layers without needing to flatten tensor
base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_tensor=scaled_inputs,
    pooling='max')

# Freeze layers of ResNet50 model to prevent them from being retrained
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers on top of base_model
x = base_model.output
x = BatchNormalization(axis=-1)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.25)(x)

# Remove final Dense layer with softmax activation function as output because it outputs vector representing class probabilities
#outputs = Dense(class_count, activation='softmax')(x)

# Define new output layer for feature extraction; No output layer needed for feature extraction so use x directly as output
mod_first_model_output = x      # Final output is result of Dropout layer, which has shape of (batch_size, 256)

# Define mod_first_model as feature extractor model
##mod_first_model = Model(inputs=base_model.input, outputs=mod_first_model_output)
mod_first_model = Model(inputs=inputs, outputs=x)  # Use 'inputs' here to include the entire pipeline


#model summary
mod_first_model.summary()

In [None]:
#DEFINE BUT DON'T TRAIN MODIFIED SECOND_MODEL, MOD_SECOND_MODEL, TO CHAIN IT WITH MOD_FIRST_MODEL

from tensorflow.keras.layers import Input, RandomFlip, RandomRotation, RandomZoom, Dense
from tensorflow.keras.layers import Rescaling, Conv2D, MaxPooling2D, Dropout, Flatten
from tensorflow.keras.models import Model

# Redefine for inputs coming from mod_first_model outputs
img_shape = (img_size[0], img_size[1], channels)
class_count = len(training_set.class_names)

# Define input layer
input_tensor = Input(shape=(256,))

# Data augmentation and rescaling not needed here; already present in mod_first_model
# x = RandomFlip("horizontal")(input_tensor)
# x = RandomRotation(0.2)(x)
# x = RandomZoom(0.2)(x)
# x = Rescaling(1./255)(x)

# Dropout, convolutional, and pooling layers already included in mod_first_model
#x = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(x)
#x = MaxPooling2D(pool_size=(2, 2))(x)
#x = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(x)
#x = MaxPooling2D(pool_size=(2, 2))(x)
#x = Dropout(0.25)(x)
#x = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(x)
#x = MaxPooling2D(pool_size=(2, 2))(x)
#x = Dropout(0.25)(x)
# Flatten layer unnecessary since mod_first_model generated compatible shape
# x = Flatten()(x)

# Build remaining layers using Functional API
x = Dense(256, activation='relu')(input_tensor)
x = Dropout(0.25)(x)

# Output layer
mod_second_model_output = Dense(class_count, activation='softmax')(x)

# Define model
mod_second_model = Model(inputs=input_tensor, outputs=mod_second_model_output)

# Print model summary to verify
mod_second_model.summary()

In [None]:
#CHAIN mod_first_model and mod_second_model

#Define variable to hold feature vector output from 'mod_first_model' as 'mod_first_model_output'
mod_first_model_output = mod_first_model.output

#Pass feature vector mod_first_model_output into mod_second_model; model takes feature vector and process it further through own layers
mod_second_model_output = mod_second_model(mod_first_model_output)    #Define variable to hold mod_second_model's output (classification probabilities)

#Define new Keras model called chained_model that chains together mod_first_model and mod_second_model into single model
chained_model = Model(inputs=mod_first_model.input, outputs=mod_second_model_output)

                      #inputs=mod_first_model.input specifies input to chained_model is same as input to mod_first_model
                      #when passing input data (like image), it will first go through mod_first_model's layers

                      #outputs=mod_second_model_output defines chained_model's, taken from result of mod_second_model_output,
                      #which was output of mod_second_model after passing feature vector from mod_first_model

chained_model.summary()

In [None]:
#Compile model
optimizer = Adam()
chained_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Define full file path including base directory
chained_filepath = os.path.join(base_dir, 'chained_model.keras')

In [None]:
#Define EarlyStopping and ModelCheckpoint callbacks

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True, verbose=1)

# Create ModelCheckpoint callback to save best model based on validation accuracy
checkpoint = ModelCheckpoint(chained_filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

In [None]:
# Train chained model with added callbacks
history = chained_model.fit(
    x=training_set,
    epochs=100,
    verbose=1,
    validation_data=validation_set,
    callbacks=[checkpoint, early_stopping]
)

chained_model.save(chained_filepath)  # Saves in specified directory

Epoch 1/100
[1m19/20[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 229ms/step - accuracy: 0.4425 - loss: 1.2193
Epoch 1: val_accuracy improved from -inf to 0.47222, saving model to /content/drive/MyDrive/BOOTCAMP/ColabNotebooks/ProjectWithGreg/Data/chained_model.keras
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 690ms/step - accuracy: 0.4471 - loss: 1.2136 - val_accuracy: 0.4722 - val_loss: 1.3457
Epoch 2/100
[1m19/20[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 202ms/step - accuracy: 0.5607 - loss: 1.0077
Epoch 2: val_accuracy did not improve from 0.47222
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 220ms/step - accuracy: 0.5601 - loss: 1.0110 - val_accuracy: 0.4583 - val_loss: 1.8716
Epoch 3/100
[1m19/20[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 186ms/step - accuracy: 0.5285 - loss: 0.9910
Epoch 3: val_accuracy improved from 0.47222 to 0.50000, saving model to /content/drive/MyDrive/BOOTCAMP/ColabNotebooks/Projec

##Model Evaluation

In [None]:
#Evaluate all four models (first_model, second_model, ensemble_model, and chained_model) on same datasets

#1. Evaluate first_model and second_model directly using training, validation, and testing_set (unseen dataset)
#Since first_model and second_model were trained using training_set and validation_set, evaluate them on testing_set to get unbiased performance metrics

# Evaluate first_model on training_set
first_model_loss, first_model_accuracy = first_model.evaluate(training_set)
print(f"First model training - Loss: {first_model_loss}, First model training accuracy - Accuracy: {first_model_accuracy}")

# Evaluate first_model on validation_set
first_model_loss, first_model_accuracy = first_model.evaluate(validation_set)
print(f"First model validation - Loss: {first_model_loss}, First model validation - Accuracy: {first_model_accuracy}")

# Evaluate first_model on testing_set
first_model_loss, first_model_accuracy = first_model.evaluate(testing_set)
print(f"First model testing - Loss: {first_model_loss}, First model testing - Accuracy: {first_model_accuracy}")

# Evaluate second_model on training_set
second_model_loss, second_model_accuracy = second_model.evaluate(training_set)
print(f"Second model training - Loss: {second_model_loss}, Second model training Accuracy: {second_model_accuracy}")

# Evaluate second_model on validation_set
second_model_loss, second_model_accuracy = second_model.evaluate(validation_set)
print(f"Second model validation - Loss: {second_model_loss}, Second model validation - Accuracy: {second_model_accuracy}")

# Evaluate second_model on testing_set
second_model_loss, second_model_accuracy = second_model.evaluate(testing_set)
print(f"Second model testing - Loss: {second_model_loss}, Second model testing - Accuracy: {second_model_accuracy}")


#2. Generate Predictions for ensemble_model, which requires predictions from first_model and second_model

#Generate predictions from first_model and second_model on training_set, validation_set, and testing_set

#Get predictions from first_model and second_model on training_set
first_model_predictions_train = first_model.predict(training_set)
second_model_predictions_train = second_model.predict(training_set)

#Get predictions from first_model and second_model on validation_set
first_model_predictions_val = first_model.predict(validation_set)
second_model_predictions_val = second_model.predict(validation_set)

#Get predictions from first_model and second_model on testing_set
first_model_predictions_test = first_model.predict(testing_set)
second_model_predictions_test = second_model.predict(testing_set)

# Average predictions from both models

ensemble_predictions_train = (first_model_predictions_train + second_model_predictions_train) / 2
ensemble_predictions_valid = (first_model_predictions_val + second_model_predictions_val) / 2
ensemble_predictions_test = (first_model_predictions_test + second_model_predictions_test) / 2


# Extract True Labels (ALREAD DONE IN PREVIOUS STEP)
# Since ensemble_model requires labels to evaluate its performance, extract labels from testing_set
# y_train = get_labels(training_set)
# y_val = get_labels(validation_set)
# y_test = get_labels(training_set)


#3. Evaluate ensemble_model on ensembled predictions and true labels

# Evaluate ensemble model on training_set

ensemble_loss, ensemble_accuracy = ensemble_model.evaluate(ensemble_predictions_train, y_train)
print(f"Ensemble model training - Loss: {ensemble_loss}, Ensemble model training - Accuracy: {ensemble_accuracy}")

ensemble_loss, ensemble_accuracy = ensemble_model.evaluate(ensemble_predictions_valid, y_val)
print(f"Ensemble model validation - Loss: {ensemble_loss}, Ensemble model validation - Accuracy: {ensemble_accuracy}")

ensemble_loss, ensemble_accuracy = ensemble_model.evaluate(ensemble_predictions_test, y_test)
print(f"Ensemble model testing - Loss: {ensemble_loss}, Ensemble model testing - Accuracy: {ensemble_accuracy}")


#Evaluate chained_model on training_set, validation_set, and testing_set

#training_set
chained_model_loss, chained_model_accuracy = chained_model.evaluate(training_set)
print(f"Chained model training - Loss: {chained_model_loss}, Chained model training - Accuracy: {chained_model_accuracy}")

#validation_set
chained_model_loss, chained_model_accuracy = chained_model.evaluate(validation_set)
print(f"Chained model validation - Loss: {chained_model_loss}, Chained model validation - Accuracy: {chained_model_accuracy}")

#testing_set
chained_model_loss, chained_model_accuracy = chained_model.evaluate(testing_set)
print(f"Chained model testing- Loss: {chained_model_loss}, Chained model testing - Accuracy: {chained_model_accuracy}")


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 199ms/step - accuracy: 0.7439 - loss: 0.6321
First model training - Loss: 0.6271082162857056, First model training accuracy - Accuracy: 0.7406198978424072
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 180ms/step - accuracy: 0.6094 - loss: 1.0408
First model validation - Loss: 1.0091373920440674, First model validation - Accuracy: 0.625
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 351ms/step - accuracy: 0.4986 - loss: 1.0803
First model testing - Loss: 1.0288690328598022, First model testing - Accuracy: 0.5301587581634521
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 149ms/step - accuracy: 0.8233 - loss: 0.4410
Second model training - Loss: 0.4488651752471924, Second model training Accuracy: 0.8189233541488647
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step - accuracy: 0.8511 - loss: 0.6145
Second model validation - Loss: 0.681522786617279, Second

##Evaluation Results

first_model  
Training       Loss: 0.6271 | Accuracy: 0.7406  
Validation     Loss: 1.0091 | Accuracy: 0.625  
Testing        Loss: 1.0288 | Accuracy: 0.5301  

second_model  
Training      Loss: 0.4488 | Accuracy: 0.8189  
Validation    Loss: 0.6815 | Accuracy: 0.8194  
Testing       Loss: 2.9589 | Accuracy: 0.3746  

ensemble_model  
Training      Loss: 1.4364 | Accuracy: 0.2120  
Validation    Loss: 1.4026 | Accuracy: 0.2777  
Testing       Loss: 1.4087 | Accuracy: 0.2444  

chained_model  
Training      Loss: 0.8707 | Accuracy: 0.6215  
Validation    Loss: 0.9116 | Accuracy: 0.5833  
Testing       Loss: 1.0094 | Accuracy: 0.5142  
