# CNNs From Scratch
In class, we made versions of LeNet and a custom model called LeNet2 where we duplicated all the convolutional layers. We'll try those, in addition to building a new network and trying to train an existing network without its pretrained weights.

## Imports

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from tensorflow.keras.utils import to_categorical
from keras.applications import EfficientNetB0
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.preprocessing import MinMaxScaler
import os

In [None]:
# Following code releases unused GPU memory for others to use:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

## Load the data

In [None]:
train_data = np.load('CIFAKE_Train.npz')
test_data = np.load('CIFAKE_Test.npz')

In [None]:
X_train = train_data['images']
y_train = train_data['labels']

X_test = test_data['images']
y_test = test_data['labels']

label_names = train_data['label_names']

## Standardize the pixel data

In [None]:
m_train = X_train.shape[0]
X_train = X_train.reshape(m_train, 32 * 32 * 3)

m_test = X_test.shape[0]
X_test = X_test.reshape(m_test, 32 * 32 * 3)

X_train.shape

(100000, 3072)

In [None]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

X_train.min(), X_train.max()

(0.0, 1.0)

In [None]:
X_train = X_train.reshape(m_train, 32, 32, 3)
X_test = X_test.reshape(m_test, 32, 32, 3)
X_train.shape

(100000, 32, 32, 3)

## LeNet and LeNet2

In [None]:
def build_LeNet(input_shape=(32, 32, 3), outputs=2):
    X0 = Input(shape=input_shape)
    X1 = Conv2D(20, kernel_size=(5, 5), padding='same', input_shape=input_shape, activation='relu')(X0)
    X1_pool = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X1)
    X2 = Conv2D(50, kernel_size=(5, 5), padding='same', activation='relu')(X1_pool)
    X2_pool = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X2)
    X2_flat = Flatten()(X2_pool)
    X3 = Dense(500, activation='relu')(X2_flat)
    Ph = Dense(outputs, activation='softmax')(X3)
    return Model(inputs=[X0], outputs=[Ph])
    # return Model(inputs=[X0], outputs=[X1, X1_pool, X2, X2_pool, X2_flat, X3, Ph])

In [None]:
def build_LeNet2(input_shape=(32, 32, 3), outputs=2):
    X0 = Input(shape=input_shape)
    X1 = Conv2D(20, kernel_size=(5, 5), padding='same', input_shape=input_shape, activation='relu')(X0)
    X2 = Conv2D(20, kernel_size=(5, 5), padding='same', activation='relu')(X1)
    X2_pool = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X2)
    X3 = Conv2D(50, kernel_size=(5, 5), padding='same', activation='relu')(X2_pool)
    X4 = Conv2D(50, kernel_size=(5, 5), padding='same', activation='relu')(X3)
    X4_pool = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(X4)
    X4_flat = Flatten()(X4_pool)
    X5 = Dense(500, activation='relu')(X4_flat)
    Ph = Dense(outputs, activation='softmax')(X5)
    return Model(inputs=[X0], outputs=[Ph])

In [None]:
LeNet = build_LeNet()
LeNet.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 conv2d (Conv2D)             (None, 32, 32, 20)        1520      
                                                                 
 max_pooling2d (MaxPooling2  (None, 16, 16, 20)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 16, 16, 50)        25050     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 8, 8, 50)          0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 3200)              0     

In [None]:
LeNet2 = build_LeNet2()
LeNet2.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 conv2d_2 (Conv2D)           (None, 32, 32, 20)        1520      
                                                                 
 conv2d_3 (Conv2D)           (None, 32, 32, 20)        10020     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 16, 16, 20)        0         
 g2D)                                                            
                                                                 
 conv2d_4 (Conv2D)           (None, 16, 16, 50)        25050     
                                                                 
 conv2d_5 (Conv2D)           (None, 16, 16, 50)        62550     
                                                           

### We'll define some hyperparameters to use for all of our models

In [None]:
# callbacks
checkpt = ModelCheckpoint('Small_CNN.h5', save_best_only=True, verbose=0)
tb = TensorBoard(log_dir='tb_logs')
e_stop = EarlyStopping(patience=3)

In [None]:
batch_size = 100
epochs = 10
learning_rate = 2e-4
optimizer = Adam(learning_rate=learning_rate)
loss = 'sparse_categorical_crossentropy'

In [None]:
LeNet.compile(loss=loss, optimizer=Adam(learning_rate=learning_rate), metrics=['accuracy'])

In [None]:
LeNet2.compile(loss=loss, optimizer=Adam(learning_rate=learning_rate), metrics=['accuracy'])

In [None]:
lenet_hist = LeNet.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2, callbacks=[checkpt, tb, e_stop])

Epoch 1/10
Epoch 2/10
 23/800 [..............................] - ETA: 3s - loss: 0.2980 - accuracy: 0.8739

  saving_api.save_model(


Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
lenet2_hist = LeNet2.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2, callbacks=[checkpt, tb, e_stop])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## New Model

LeNet and LeNet2 did a great job on CIFAKE. More modern CNNs have been converging toward not having many pooling layers if any, so it might be interesting to see if we can do better if we don't pool.

In [None]:
def build_NoPool(input_shape=(32, 32, 3), outputs=2):
    X0 = Input(shape=input_shape)
    X1 = Conv2D(20, kernel_size=(5, 5), padding='same', input_shape=input_shape, activation='relu')(X0)
    X2 = Conv2D(50, kernel_size=(5, 5), padding='same', activation='relu')(X1)
    X3 = Conv2D(20, kernel_size=(5, 5), padding='same', activation='relu')(X2)
    X4_flat = Flatten()(X3)
    X5 = Dense(500, activation='relu')(X4_flat)
    Ph = Dense(outputs, activation='softmax')(X5)
    return Model(inputs=[X0], outputs=[Ph])

In [None]:
no_pool = build_NoPool()
no_pool.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 conv2d_6 (Conv2D)           (None, 32, 32, 20)        1520      
                                                                 
 conv2d_7 (Conv2D)           (None, 32, 32, 50)        25050     
                                                                 
 conv2d_8 (Conv2D)           (None, 32, 32, 20)        25020     
                                                                 
 flatten_2 (Flatten)         (None, 20480)             0         
                                                                 
 dense_4 (Dense)             (None, 500)               10240500  
                                                                 
 dense_5 (Dense)             (None, 2)                 1002

In [None]:
no_pool.compile(loss=loss, optimizer=Adam(learning_rate=learning_rate), metrics=['accuracy'])

In [None]:
no_pool_hist = no_pool.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2, callbacks=[checkpt, tb, e_stop])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# EfficientNet from scratch

Our model with no pooling did a pretty good job, but LeNet2 still did better. Now we'll try training EfficientNet from scratch, since it is supposedly optimized to have the best number of parameters, and it did well with pretrained weights.

In [None]:
efficient_net = EfficientNetB0(weights=None, include_top=False, input_shape=(32, 32, 3))

In [None]:
x = Dense(512, activation='relu')(efficient_net.output)
x2 = Dense(256, activation='relu')(x)
Ph = Dense(2, activation='softmax')(x2)
model = Model(inputs=efficient_net.inputs, outputs=Ph)

In [None]:
model.compile(optimizer=Adam(learning_rate=learning_rate), loss=loss, metrics=['accuracy'])

In [None]:
eff_net_hist = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2, callbacks=[checkpt, tb, e_stop])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Interestingly, it seems EfficientNet was not great starting from scratch. This makes sense because pretrained models are supposed to do better in general, but we were still able to get very high scores with the other models we tried earlier that didn't have any pretraining.