## CNN Model

### Imports 

In [1]:
# imports
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import img_to_array, load_img

from sklearn.model_selection import train_test_split

In [2]:
# Setting a random seed for reproducibility
np.random.seed(42)

### Reading in image data

In [None]:
# read in image data here. try a target size of 512x512 (that was what preprocessed images in one of the kaggle datasets had)
# also normalize the data 

### Setting X and y

In [None]:
# set X and y here

# X will be the array of images
# y will be the target. If y is multiclass instead of binary, use y = to_categorical(y) to one-hot-encode y in keras

# images should already be scaled (normalized) so we don't need to use StandardScaler

### Train-Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

### Building the CNN Model

In [None]:
# Instantiate a Sequential model (that will process each layer sequentially)
model = Sequential()

In [None]:
# add a Convolutional 2D layer that will create 16 3x3 filters to detect image features

# starting with a small number of features and increasing them for each layer is recommended because
# the model finds the smaller number of main features first and then builds them up into more complex larger numbers of features

# the input shape is 512x512 pixels with 3 channels (RGB)
# one of the kaggle datasets with preprocessed images had processed them to a 512x512 size

# using relu (rectified linear function) activation, which returns positive output directly and returns 0 for negative output

model.add(Conv2D(16, (3,3), activation='relu', input_shape=(512,512,3)))

In [None]:
# add a MaxPooling 2D layer that will take the maximum value in every 2x2 grid (with a stride defaulting to the pool_size)
# this effectively cuts the dimensions of the data in half, and helps get rid of noise caused by small variations in the image

model.add(MaxPooling2D(pool_size=(2,2)))

In [3]:
# add more convolutional layers (with max pooling between each one) 

# increasing filters to 32
# input shape is only needed for the first layer above
model.add(Conv2D(32, (3,3), activation='relu'))
model.add(MaxPooling2D((2,2)))

# increasing filters to 64
model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPooling2D((2,2)))

In [None]:
# add a flatten layer to bridge between the convolutional layers and the dense layers
model.add(Flatten())

In [None]:
# the dense layer analyzes the features that were identified in the convolutional layers 
model.add(Dense(256, activation='relu'))

In [None]:
# add the output layer
# if thre are multiple classes (not binary), change the nodes to the number of classes and use softmax activation
model.add(Dense(1, activation='sigmoid'))

In [None]:
# compile the model
# if multiclass, use categorical_crossentropy instead 
model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')

In [None]:
# fit the model and save it as h so the accuracy and loss scores for each epoch can be visualized
# batch size is the number of images processed before updating the metrics
# epochs is the number of times the model goes through the entire dataset
h = model.fit(X_train, y_train, validation_data=(X_test,y_test), batch_size=64, epochs=10)

### Visualizing the accuracy and loss scores for each epoch

In [None]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.legend();

In [None]:
plt.plot(h.history['loss'], label='training loss')
plt.plot(h.history['val_loss'], label='validation loss')
plt.legend();

### Feature Maps

In [None]:
# we don't need to do this but I found a blog about how to write code to see the feature maps generated by the model--
# https://www.analyticsvidhya.com/blog/2020/11/tutorial-how-to-visualize-feature-maps-directly-from-cnn-layers/

# if we have time it would be cool to try this