<a href="https://colab.research.google.com/github/sujathasivaraman/mlai/blob/main/cnnclassifier_junk.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title TEST for CNN :  Run this to load images and imports! {"display-mode":"form", "form-width":"25%"}
#@title {"display-mode":"form", "form-width":"25%"}
#@markdown ##**BEFORE RUNNING ANY CODE, please change your Hardware Accelerator to GPU to train faster!**</h2>
#@markdown 1. Click on the **Runtime** menu at the top of the screen.
#@markdown 2. Click **Change Runtime Type**.
#@markdown 3. Choose **T4 GPU** under **Hardware Accelerator**.

#@markdown Once you've done that, run this code cell to check you're correctly connected!

import tensorflow as tf
from IPython.display import Markdown

if tf.test.gpu_device_name():
  display(Markdown("###✅ GPU connected!"))
else:
  display(Markdown("""
###❌ No GPU found!
If you're running into GPU limits when you try to switch, here are some suggestions:
  - Wait 12-24 hours for the limits to reset.
  - Share your copy of the notebook with another Google account that hasn't met the limit, and work through the notebook with that account.
  - Look into a paid subscription or paying for compute units as you go.
  """))
  #@title **🏗 Setup Cell** {"display-mode":"form", "form-width":"25%"}
#@markdown **Run this to import libraries and download data!**

#-------------------------------------------------------------------------------
# IMPORTS
#-------------------------------------------------------------------------------
from collections import Counter

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import model_selection
from sklearn.metrics import accuracy_score

import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, InputLayer
from tensorflow.keras.layers import Activation, MaxPooling2D, Dropout, Flatten, Reshape
from tensorflow.keras.utils import to_categorical

# Inspirit's util file and discussion exercise answer handler
!wget -q "https://storage.googleapis.com/inspirit-ai-data-bucket-1/Modules/inspiritai_util.py"
from inspiritai_util import handle_discussion_response

#-------------------------------------------------------------------------------
# HELPER FUNCTIONS
#-------------------------------------------------------------------------------
def categorical_to_numpy(labels_in):
  labels = []
  for label in labels_in:
    if label == 'dog':
      labels.append(np.array([1, 0]))
    else:
      labels.append(np.array([0, 1]))
  return np.array(labels)

def one_hot_encoding(input):
  output = np.zeros((input.size, input.max()+1))
  output[np.arange(input.size), input] = 1

  return output

def load_data():
  # Run this cell to download our data into a file called 'cifar_data'
  !wget -q --show-progress -O cifar_data https://storage.googleapis.com/inspirit-ai-data-bucket-1/Data/AI%20Scholars/Sessions%201%20-%205/Session%204%20_%205%20-%20Neural%20Networks%20_%20CNN/dogs_v_roads

  # now load the data from our cloud computer
  import pickle
  with open("cifar_data", "rb") as f:
      data_dict = pickle.load(f)

  data   = data_dict['data']
  labels = data_dict['labels']

  return data, labels

def plot_one_image(data, labels, img_idx):
  import matplotlib.pyplot as plt

  img   = data[img_idx, :].reshape([32,32,3]).copy()
  label = labels[img_idx]

  fig, ax = plt.subplots(1,1)
  img = ax.imshow(img.astype('uint8'), extent=[-1,1,-1,1])

  x_label_list = [0, 8, 16, 24, 32]
  y_label_list = [0, 8, 16, 24, 32]

  ax.set_xticks([-1, -0.5, 0, 0.5, 1])
  ax.set_yticks([-1, -0.5, 0, 0.5, 1])

  ax.set_xticklabels(x_label_list)
  ax.set_yticklabels(y_label_list)

  ax.set_title(f'Image: {img_idx} | Label: {label}')

  display(fig)
  plt.close(fig)

def plot_acc(history, ax=None, xlabel='Epoch #'):
    history = history.history
    history.update({'epoch':list(range(len(history['val_accuracy'])))})
    history = pd.DataFrame.from_dict(history)

    best_epoch = history.sort_values(by = 'val_accuracy', ascending = False).iloc[0]['epoch']

    if not ax:
      f, ax = plt.subplots(1,1)
    sns.lineplot(x = 'epoch', y = 'val_accuracy', data = history, label = 'Validation', ax = ax)
    sns.lineplot(x = 'epoch', y = 'accuracy', data = history, label = 'Training', ax = ax)
    ax.axhline(0.5, linestyle = '--',color='red', label = 'Chance')
    ax.axvline(x = best_epoch, linestyle = '--', color = 'green', label = 'Best Epoch')
    ax.legend(loc = 'best')
    ax.set_ylim([0.4, 1.005])

    ax.set_xlabel(xlabel)
    ax.set_ylabel('Accuracy (Fraction)')

    plt.show()

def logits_to_one_hot_encoding(input):
    """
    Converts softmax output (logits) to a one-hot encoded format.

    This function takes an array of softmax output probabilities
    (usually from a neural network's output layer) and converts
    each row to a one-hot encoded vector. The highest probability
    in each row is marked as 1, with all other values set to 0.

    Parameters:
    input (numpy.ndarray): A 2D array where each row contains softmax probabilities for each class.
                            The shape of the array is (n_samples, n_classes).

    Returns:
    numpy.ndarray: A 2D array of the same shape as the input, where each row is the one-hot encoded representation
                   of the class with the highest probability in the original row.
    """

    output = np.zeros_like(input, dtype=int)
    output[np.arange(len(input)), np.argmax(input, axis=1)] = 1
    return output

#-------------------------------------------------------------------------------
# CUSTOM CNN CLASS
#-------------------------------------------------------------------------------
class CNNClassifier:
    """
    A Convolutional Neural Network (CNN) classifier using Keras, customized for binary classification tasks.

    This class wraps a Keras Sequential model with a specific architecture suitable for image classification tasks.
    It includes a custom `predict` method that outputs one-hot encoded predictions, and other standard Keras model
    methods are accessible as well. This was done to override the need for the SciKeras wrappers that is frequently
    incompatible with Google Colab versions of Keras & Tensorflow. Feel free to modify as needed.

    Attributes:
        num_epochs (int): The number of training epochs.
        layers (int): The number of convolutional layers in the model.
        dropout (float): The dropout rate used in dropout layers for regularization.
        model (keras.models.Sequential): The underlying Keras Sequential model.

    Methods:
        build_model(): Constructs the CNN model with the specified architecture and compiles it.

        fit(*args, **kwargs): Trains the model. Accepts arguments compatible with the Keras `fit` method.

        predict(*args, **kwargs): Predicts labels for the input data. Converts the softmax output of the model
                                  to one-hot encoded format using `logits_to_one_hot_encoding`. Necessary to match
                                  accuracy_score function expected arguments.

        predict_proba(*args, **kwargs): Predicts labels for the input data and returns the raw output of the softmax.
                                        Used when wanting to inspect the raw probabilistic scoring of the model.

    Usage:
        cnn_classifier = CNNClassifier(num_epochs=30, layers=4, dropout=0.5)
        cnn_classifier.fit(X_train, y_train)
        predictions = cnn_classifier.predict(X_test)

    Note:
        The `__getattr__` method is overridden to delegate attribute access to the underlying Keras model,
        except for the `predict` method which is customized.
    """
    def __init__(self, num_epochs=30, layers=4, dropout=0.5):
        self.num_epochs = num_epochs
        self.layers = layers
        self.dropout = dropout
        self.model = self.build_model()

    def build_model(self):
        model = Sequential()
        model.add(Reshape((32, 32, 3)))

        for i in range(self.layers):
          model.add(Conv2D(32, (3, 3), padding='same'))
          model.add(Activation('relu'))

        model.add(Conv2D(32, (3, 3)))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(self.dropout))

        model.add(Conv2D(64, (3, 3), padding='same'))
        model.add(Activation('relu'))
        model.add(Conv2D(64, (3, 3)))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(self.dropout))

        model.add(Flatten())
        model.add(Dense(512))
        model.add(Activation('relu'))
        model.add(Dropout(self.dropout))
        model.add(Dense(2))
        model.add(Activation('softmax'))
        opt = keras.optimizers.RMSprop(learning_rate=0.0001)
        model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

        return model

    def fit(self, *args, **kwargs):
        return self.model.fit(*args, epochs=self.num_epochs, batch_size=10, verbose=2, **kwargs)

    # NOTE: WRITTEN TO RETURN ONE HOT ENCODINGS FOR ACCURACY
    def predict(self, *args, **kwargs):
        predictions = self.model.predict(*args, **kwargs)
        return logits_to_one_hot_encoding(predictions)

    def predict_proba(self, *args, **kwargs):
        predictions = self.model.predict(*args, **kwargs)
        return predictions

    def score(self, X, y):
        predictions = self.predict(X)
        return accuracy_score(y, predictions)

    def __getattr__(self, name):
        if name != 'predict' and name != 'predict_proba':
            return getattr(self.model, name)
        else:
            raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")
            # Load our data
data_raw, labels_raw = load_data()
data = data_raw.astype(float)
labels = categorical_to_numpy(labels_raw)
X_train, X_test, y_train, y_test = model_selection.train_test_split(data, labels, test_size=0.2, random_state=1)

### WRITE YOUR CODE BELOW: Print the image data and its shape using the shape attribute!
plot_one_image(data_raw, labels_raw, 300) # Play around with the number!
# Create and train our CNN model CNNClassifier
cnn = CNNClassifier(num_epochs=5, layers=2, dropout=0.5)
cnn.fit(X_train, y_train)

# Print the score on the testing data
print(cnn.score(X_test, y_test))

history = model.fit(X_train, y_train, validation_data=(X_test, y_test))
plot_acc(history)

# Print the score on the testing data
print("CNN Testing Set Score:")
print(cnn.score(X_test, y_test))

# This network can be described as:

# Input Layer: 3 neurons
# Layer 1 (Hidden): 4 neurons that are activated by 'relu'
# Layer 2 (Output): 2 neurons that are activated by 'softmax'
# We also want to compile the model with loss = 'categorical_crossentropy'
## WRITE YOUR CODE BELOW: delete the #s and fill in the blanks!

# model_1 = Sequential()
# model_1.add(InputLayer(shape=(____,)))
# model_1.add(Dense(____, activation='____'))
# model_1.add(Dense(____, activation='____'))
# model_1.compile(loss='____', optimizer='adam', metrics=['accuracy'])
# model_1.predict(np.array([[14, 18, 5]])) # Try any input! This represents an animal of height 14, weight 18, and age 5

# ReLU (Rectified Linear Unit)
# Usage: ReLU is primarily used in the hidden layers of neural networks.
# Function: It outputs the input directly if it is positive; otherwise, it will output zero. Mathematically, it's defined as

# Advantages:
# Helps in speeding up the training process by overcoming the vanishing gradient problem common with other activation functions like sigmoid or tanh.
# It introduces non-linearity into the network, allowing it to learn more complex patterns.
# Softmax
# Usage: Softmax is typically used in the output layer of a classifier, where we need to handle multiple classes.
# Function: Softmax converts logits (the raw output scores in logistic regression) into probabilities by taking the exponentials of each output and then normalizing these values by dividing by the sum of all exponentials. This gives a probability distribution across various classes that sums to 1. Mathematically, for each output
#  in the layer, it's defined as


# Advantages:
# By outputting a probability distribution, it works well for classes that are mutually exclusive, making it ideal for multi-class classification problems.
model = Sequential()
model.add(Reshape((32, 32, 3))) # Try to figure out why this layer is necessary!

model.add(Conv2D(32, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

###
### WRITE YOUR CODE BELOW: Add more layers!
###





###
### END CODE HERE
###

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(2))
model.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = keras.optimizers.RMSprop(learning_rate=0.0001)

# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# Train the CNN and plot accuracy.
history = model.fit(X_train, y_train,
                    validation_data=(X_test, y_test),
                    epochs=70)
plot_acc(history)
model.add(Conv2D(96, 11, strides=3))
model.add(Activation('relu'))
opt = keras.optimizers.RMSprop(learning_rate=0.0001)
model.add(Dense(2))
model.add(Activation('softmax'))


# 1. Number of Layers
# Impact: More layers allow a CNN to learn more complex features at different levels of abstraction. For example, deeper layers in models like VGG16 are adept at recognizing high-level features in images.
# Example: model.add(Conv2D(32, (3, 3))) followed by multiple similar layers.
# 2. Number of Filters
# Impact: Each filter detects different features, and having more filters increases the network's capacity to learn diverse features.
# Example: Starting with model.add(Conv2D(32, (3, 3))) and increasing to model.add(Conv2D(64, (3, 3))) in subsequent layers.
# 3. Filter Size
# Impact: Smaller filters (e.g., 3x3) are excellent for capturing small detail, while larger filters (e.g., 5x5) capture wider patterns.
# Example: model.add(Conv2D(64, (3, 3))) for fine detail versus model.add(Conv2D(64, (5, 5))) for broader features.
# 4. Stride
# Impact: Strides affect how the filter moves across the image; larger strides reduce the output size.
# Example: model.add(Conv2D(64, (3, 3), strides=(2, 2))), reducing spatial dimensions more rapidly.
# 5. Padding
# Impact: 'Same' padding ensures the output has the same width and height as the input, while 'valid' does not add zero padding.
# Example: model.add(Conv2D(64, (3, 3), padding='same')) keeps dimensions intact.
# 6. Activation Function
# Impact: Determines how neurons fire. Commonly used ReLU is effective for non-linear problems.
# Example: model.add(Activation('relu')) typically follows each convolutional layer.
# 7. Pooling Layer
# Impact: Reduces dimensionality, thus controlling overfitting and reducing computational load.
# Example: model.add(MaxPooling2D((2, 2))) often follows one or more convolutional layers.
# 8. Learning Rate
# Impact: A crucial factor in convergence speed. Too high can overshoot minimum; too low may result in a long training process.
# Example: optimizer = keras.optimizers.Adam(learning_rate=0.001)
# 9. Batch Size
# Impact: Affects the memory footprint and can influence the model's generalization.
# Example: model.fit(x_train, y_train, batch_size=64)
# 10. Epochs
# Impact: More epochs generally lead to a better fit, provided early stopping is used to prevent overfitting.
# Example: model.fit(x_train, y_train, epochs=20)
#@title Run this to load cat and dog data! {"display-mode":"form", "form-width":"25%"}

# Code here from https://colab.research.google.com/github/google/eng-edu/blob/master/ml/pc/exercises/image_classification_part1.ipynb#scrollTo=4PIP1rkmeAYS

import tensorflow as tf
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

try:
  road_model = model
  road_saved = True
except NameError:
  road_saved = False

IMG_SHAPE  = 150  # Our training data consists of images with width of 150 pixels and height of 150 pixels
_URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'
zip_dir = tf.keras.utils.get_file('cats_and_dogs_filtered.zip', origin=_URL, extract=True)
base_dir = os.path.join(os.path.dirname(zip_dir), 'cats_and_dogs_filtered_extracted/cats_and_dogs_filtered')
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')

train_cats_dir = os.path.join(train_dir, 'cats')  # directory with our training cat pictures
train_dogs_dir = os.path.join(train_dir, 'dogs')  # directory with our training dog pictures
validation_cats_dir = os.path.join(validation_dir, 'cats')  # directory with our validation cat pictures
validation_dogs_dir = os.path.join(validation_dir, 'dogs')  # directory with our validation dog pictures
train_image_generator      = ImageDataGenerator()  # Generator for our training data
validation_image_generator = ImageDataGenerator()  # Generator for our validation data
train_data = next(train_image_generator.flow_from_directory(batch_size=2000,
                                                           directory=train_dir,
                                                           shuffle=True,
                                                           target_size=(IMG_SHAPE,IMG_SHAPE), #(150,150)
                                                           class_mode='binary'))
val_data = next(validation_image_generator.flow_from_directory(batch_size=1000,
                                                              directory=validation_dir,
                                                              shuffle=False,
                                                              target_size=(IMG_SHAPE,IMG_SHAPE), #(150,150)

                                                              class_mode='binary'))
cd_train_inputs, cd_train_labels = train_data
cd_test_inputs, cd_test_labels = val_data

index = np.random.randint(len(cd_train_inputs))
plt.imshow(cd_train_inputs[index] / 255)
plt.show()
print("Label:", cd_train_labels[index])

#From AlexNet to VGGNet: A Better Choice for Image Classification
print(cd_train_inputs.shape)
print(cd_train_labels.shape)
print(cd_test_inputs.shape)
print(cd_test_labels.shape)
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Model
from matplotlib import pyplot
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten

Below, we explore these parameters through practical examples directly applied in Keras.

train_data = next(train_image_generator.flow_from_directory(batch_size=2000,
                                                           directory=train_dir,
                                                           shuffle=True,
                                                           target_size=(224,224), #(150,150)
                                                           class_mode='binary'))
val_data = next(validation_image_generator.flow_from_directory(batch_size=1000,
                                                              directory=validation_dir,
                                                              shuffle=False,
                                                              target_size=(224,224), #(150,150)
                                                              class_mode='binary'))
cd_train_inputs, cd_train_labels = train_data
cd_test_inputs, cd_test_labels = val_data

###
### WRITE YOUR CODE BELOW: Fill in the blanks!
###

# Load the VGG16 model without the top (classifier) layers
base_model = VGG16(include_top=False, input_shape=(______,______,______))

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom classifier on top
x = Flatten()(base_model.output)
x = Dense(128, activation='______', kernel_initializer='he_uniform')(x)
output = Dense(2, activation='______')(x)

###
### END CODE HERE
###


# Define new model
model = Model(inputs=base_model.input, outputs=output)

# Compile model
opt = SGD(learning_rate=0.001, momentum=0.9)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    cd_train_inputs, to_categorical(cd_train_labels),
    validation_data=(cd_test_inputs, to_categorical(cd_test_labels)),
    epochs=2
)

# Display training history and model structure
plot_acc(history)
model.summary()