# Advanced Modeling

Objectives:
- build multiples models and evaluate

---

## CNN using Keras, Tensorflow

### Import Libraries

In [1]:
## IMPORT LIBRARIES ##

import numpy as np #numpy

import pandas as pd # pandas

from matplotlib import pyplot as plt # matplot library
import seaborn as sns

In [2]:
## IMPORT LIBRARIES ##

import os  
from matplotlib import image as mpimg
from random import randint
from PIL import Image
from skimage import io, img_as_float, img_as_ubyte
from skimage.io import imread, imshow
import cv2
from glob import glob
from sklearn.decomposition import PCA
import warnings 
warnings.filterwarnings('ignore')

##########

In [5]:
# IMPORT LIBRARIES

import tensorflow as tf
import keras 

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout

### Load the Dataset as a Tensor

The function we'll be using to load the dataset is `keras.utils.image_dataset_from_directory` and it generates a `tf.data.Dataset` object from an image files.  

These loading utilites can be combined with preprocessing layers to futher transform our input dataset before training. 

In [6]:
# Set the parameters for the split
validation_split = 0.2
seed = 123

# Load training data
train_images = tf.keras.utils.image_dataset_from_directory(
    'dataset/training',
    label_mode = "int",
    color_mode = "rgb",
    image_size = (600, 800),
    batch_size = 32,
    shuffle = False,
    seed = seed,
    validation_split = validation_split,
    subset = "training",  # Specify this is the training subset
    interpolation = "bilinear",
    follow_links = False,
    crop_to_aspect_ratio = False,
    pad_to_aspect_ratio = False,
    data_format = None,
    verbose = True
)

# Load validation data
validation_images = tf.keras.utils.image_dataset_from_directory(
    'dataset/training',
    label_mode = "int",
    color_mode = "rgb",
    image_size = (600, 800),
    batch_size = 32,
    shuffle = False,
    seed = seed,
    validation_split = validation_split,
    subset = "validation",  # Specify this is the validation subset
    interpolation = "bilinear",
    follow_links = False,
    crop_to_aspect_ratio = False,
    pad_to_aspect_ratio = False,
    data_format = None,
    verbose = False
)

# Load test data
test_images = keras.utils.image_dataset_from_directory(
    'dataset/testing',
    label_mode = "int",
    class_names = None,
    color_mode = "rgb",
    image_size = (600,800),
    batch_size = 32,
    shuffle = False,
    seed = seed,
    validation_split = None,
    subset = None,
    interpolation = "bilinear",
    data_format = None,
    verbose = True
)

Found 2720 files belonging to 3 classes.
Using 2176 files for training.
Using 544 files for validation.
Found 680 files belonging to 3 classes.


### Setting up a CNN Sequential Model

A Sequential model is appropriate for a plain stack of layers where each layer has exactly one input tensor and one output tensor. A Sequential model is not appropriate when:

- the model has multiple inputs or multiple outputs
- Any of the layers has multiple inputs or multiple outputs

We can create a Sequential model incrementally via the `add()` method:

In [7]:
# Create a Sequential Model

CNN_model = Sequential()

# Create simple CNN model architecture with Pooling for dimensionality reduction 
# and Dropout to reduce overfitting
CNN_model.add(Conv2D(32, kernel_size=(3, 3), activation = 'relu', input_shape = (600, 800, 3)))
CNN_model.add(MaxPooling2D(pool_size=(2, 2)))

CNN_model.add(Conv2D(64, (3, 3), activation='relu'))
CNN_model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the output of our convolutional layers
CNN_model.add(Flatten())

# Add dense layers
CNN_model.add(Dense(128, activation='relu'))
CNN_model.add(Dense(64, activation='relu'))
CNN_model.add(Dense(3, activation='softmax'))

# Print out a summary of the network
CNN_model.summary()

Layers 

The `tf.keras.layers.Layer` class is the fundamental abstraction in Keras. A Layer encapsulates a state (weights) and some computation (defined in the `tf.keras.layers.Layer.call` method).

Weights created by layers can be trainable or non-trainable. Layers are recursively composable: If you assign a layer instance as an attribute of another layer, the outer layer will start tracking the weights created by the inner layer.

You can also use layers to handle data preprocessing tasks like normalization and text vectorization. Preprocessing layers can be included directly into a model, either during or after training, which makes the model portable.

Models

A model is an object that groups layers together and that can be trained on data.

The simplest type of model is the Sequential model, which is a linear stack of layers.

#### Compile Settings

Compiling the model EXPLAIN WHY I CHOSE THE COMPILE SETTINGS

In [8]:
# Compile the model with the desired loss function, optimizer, and metric(s) to track
CNN_model.compile(loss = 'sparse_categorical_crossentropy',
                  optimizer = 'Adam',
                  metrics = ['accuracy'])

#### Train the Model

Fit the model talk about the training process

In [9]:
# Fit the model using the training and validation datasets
CNN_model.fit(
    train_images,
    epochs=5,
    verbose=1,
    validation_data=validation_images  # Specify the validation dataset here
)


Epoch 1/5
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m268s[0m 4s/step - accuracy: 0.4288 - loss: 3461.5281 - val_accuracy: 0.4320 - val_loss: 1.0446
Epoch 2/5
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 4s/step - accuracy: 0.4780 - loss: 1.0015 - val_accuracy: 0.4816 - val_loss: 1.0246
Epoch 3/5
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 4s/step - accuracy: 0.5942 - loss: 0.7999 - val_accuracy: 0.5846 - val_loss: 1.1771
Epoch 4/5
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m273s[0m 4s/step - accuracy: 0.7372 - loss: 0.6113 - val_accuracy: 0.5074 - val_loss: 1.5274
Epoch 5/5
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m299s[0m 4s/step - accuracy: 0.7845 - loss: 0.4968 - val_accuracy: 0.6342 - val_loss: 2.3302


<keras.src.callbacks.history.History at 0x140978110>

### Evaluate the Model

In [10]:
# Evaluate the model's performance on the test data
score = CNN_model.evaluate(test_images, verbose=1)

print('Test loss:', score[0])
print('Test accuracy:', score[1])

[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 631ms/step - accuracy: 0.6366 - loss: 3.4305
Test loss: 3.776289463043213
Test accuracy: 0.6088235378265381


### Key Observations

1. **Accuracy Improvement:**
- Training Accuracy has improved consistently from 42.88% to 78.45% over the 5 epochs. This indicates that the model is learning effectively from the training data.
- Validation Accuracy also improves but exhibits some fluctuation, starting at 43.20%, peaking at 63.42% in the last epoch, but dipping in the fourth epoch. This behavior may suggest some issues with generalization or response to model changes per epoch.

2. **Loss Analysis:**
- Training Loss starts unusually high at 3461.5281, which is atypical and might indicate a problem with how the loss is being calculated or an issue with the initial weights. However, it decreases to a more typical value of 0.4968 by the final epoch, showing proper learning progression.
Validation Loss starts at 1.0446 and increases to 2.3302 by the end of the training. The increasing validation loss as training progresses is concerning and might suggest that the model is starting to overfit the training data.

3. **Potential Issues & Solutions**

1. High Initial Loss:
The extraordinarily high initial loss could be due to a number of factors including incorrect initialization of weights, an anomaly in the first batch of training data, or improper scaling/normalization of input data. Check the initialization settings and ensure data is correctly preprocessed.
2. Validation Loss Increasing:
	- Overfitting: The model's increasing loss on the validation set suggests it may be overfitting the training data. Overfitting happens when a model learns the details and noise in the training data to an extent that it negatively impacts the performance of the model on new data.
	- Solutions:
		- Data Augmentation: To combat overfitting, increase the diversity of your training data through data augmentation (rotations, translations, flips).
		- Dropout Layers: Incorporate dropout in your network, which randomly drops units (along with their connections) during the training process to prevent units from co-adapting too much.
		- Regularization: Implement L1 or L2 regularization which can penalize large weights during training.
Early Stopping: Monitor the validation loss and stop training when it begins to increase despite improvements in training loss.


**Next Steps**
- Monitor Training More Closely: Use callbacks like ModelCheckpoint and EarlyStopping during training to save the best model and halt training when the validation score deteriorates.
- Experiment with Hyperparameters: Adjust learning rates, increase/decrease the number of layers and units in the layers, and try different optimizers.
- Evaluate Model with Different Metrics: Consider other metrics such as precision, recall, F1-score, especially if the classes are imbalanced.

Discuss the evaluation metrics results

## Image Classification with Pre-Trained CNN (or Transfer Learning)

The steps for transfer learning:

1.  Obtain a pre-trained CNN Model
2.  Remove the last few layers
3.  Add layers to the end of the pre-trained CNN Model to classify our data
4.  Hold the other layers static
5.  train the new model

### Getting Started

In [51]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define image dimensions and data generator with a validation split
height = 600
width = 800
channels = 3

train_datagen = ImageDataGenerator(
    rotation_range=30,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # Setting 20% of the images for validation
)

test_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

In [52]:
# Training generator
train_generator = train_datagen.flow_from_directory(
    'dataset/training',
    target_size=(height, width),
    batch_size=32,
    color_mode='rgb',
    class_mode='categorical',
    subset='training',
    seed=123
)

# Validation generator
validation_generator = train_datagen.flow_from_directory(
    'dataset/training',
    target_size=(height, width),
    batch_size=32,
    color_mode='rgb',
    class_mode='categorical',
    subset='validation',  
    seed=123
)

# Testing generator
test_generator = test_datagen.flow_from_directory(
    'dataset/testing/',
    target_size=(height, width),
    batch_size=32,
    color_mode='rgb',
    class_mode='categorical',  
    seed=123
)

Found 2176 images belonging to 3 classes.
Found 544 images belonging to 3 classes.
Found 680 images belonging to 3 classes.


### Setting up a Pre-Trained CNN ResNet-50 Model

In [53]:
#### ResNet-50

import random

def set_random_seed(seed):
    
    '''Random seeds for reproducability'''
    
    random.seed(seed)
    tf.random.set_seed(seed)

In [54]:
from tensorflow.keras.applications import ResNet50V2

res_model = ResNet50V2(weights='imagenet',
                   include_top=False,
                   input_shape=(600,800,3))
res_model.summary()

In [55]:
# Freeze all the layers in the base model
for layer in res_model.layers:
    layer.trainable = False

In [56]:

set_random_seed(123)
x = res_model.output

x = Flatten()(x)
x = Dense(32, activation='relu')(x)
output = Dense(3, activation='softmax')(x)

In [57]:
model = Model(inputs=res_model.input, 
              outputs = output)

In [59]:
from tensorflow.keras.callbacks import EarlyStopping

set_random_seed(121)
model.compile(optimizer='Adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


# Use an early stopping callback to stop training
# once we no longer have improvements in our validation loss
early_stop = EarlyStopping(monitor='val_loss', 
                           patience=4, 
                           mode='min', 
                           verbose=1)
model.fit(train_generator,
          epochs=10,
          validation_data = validation_generator,
          callbacks=[early_stop])

Epoch 1/10
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m433s[0m 6s/step - accuracy: 0.4537 - loss: 4956.7969 - val_accuracy: 0.4301 - val_loss: 1191.3196
Epoch 2/10
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m413s[0m 6s/step - accuracy: 0.5700 - loss: 1352.3108 - val_accuracy: 0.4908 - val_loss: 2047.6560
Epoch 3/10
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m415s[0m 6s/step - accuracy: 0.6287 - loss: 1007.9902 - val_accuracy: 0.5000 - val_loss: 2048.4104
Epoch 4/10
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m415s[0m 6s/step - accuracy: 0.5742 - loss: 2025.0289 - val_accuracy: 0.4062 - val_loss: 4428.0405
Epoch 5/10
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m423s[0m 6s/step - accuracy: 0.6455 - loss: 1223.8345 - val_accuracy: 0.4706 - val_loss: 2727.5088
Epoch 5: early stopping


<keras.src.callbacks.history.History at 0x3093cfc50>

###  Evaluate the Model

### Key Observasions

## Conclusion

In [67]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

# Setting up the figure and axis
fig, ax = plt.subplots()
ax.set_xlim(0, 6)
ax.set_ylim(0, 6)
ax.set_xticks(np.arange(0, 7, 1))
ax.set_yticks(np.arange(0, 7, 1))
ax.grid(True)

# Create the image as a 6x6 grid
image = np.random.randint(0, 255, (6, 6), dtype=np.uint8)

# The 3x3 convolutional kernel
kernel = np.array([[1, 0, -1], [1, 0, -1], [1, 0, -1]])

# Initial placement of the kernel on the image
kernel_position = [0, 0]  # top-left corner of the kernel

# Create an image plot and a "kernel" plot
image_map = ax.imshow(image, cmap='gray', extent=(0, 6, 0, 6), origin='upper', interpolation='none')
kernel_box = plt.Rectangle(kernel_position, 3, 3, edgecolor='red', facecolor='none', lw=2)
ax.add_patch(kernel_box)

def update(frame):
    # Update the position of the kernel
    kernel_position[0] = (frame % 4) * 1  # Move right each frame
    kernel_position[1] = (frame // 4) * 1  # Move down every 4 frames

    # Move the kernel box
    kernel_box.set_xy(kernel_position)

    return kernel_box,

# Create the animation
ani = FuncAnimation(fig, update, frames=16, blit=True, interval=500)
plt.close(fig)  # Close the figure to prevent it from being displayed as a static image
HTML(ani.to_jshtml())
