In [103]:
"""
NICOLE JOSEPH
Deep Learning HW 3
"""


'\nNICOLE JOSEPH\nDeep Learning HW 3\n'

In [104]:
# citation: To load the MNIST data 
# https://colab.research.google.com/github/AviatorMoser/keras-mnist-tutorial/blob/master/MNIST%20in%20Keras.ipynb#scrollTo=oYLlg52B_uCJ

# Function to download the MNIST dataset - avoid using built in keras MNIST dataset
from requests import get
def download_file(url, file_name):
    with open(file_name, "wb") as file:
        response = get(url)
        file.write(response.content)


In [105]:
# MNIST data set downloaded from http://yann.lecun.com/exdb/mnist/
download_file('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', 'train-images-idx3-ubyte.gz')
download_file('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', 'train-labels-idx1-ubyte.gz')
download_file('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', 't10k-images-idx3-ubyte.gz')
download_file('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', 't10k-labels-idx1-ubyte.gz')

# print("files downloaded")

In [106]:
!pip install seaborn==0.9.0
!pip install --upgrade scikit-learn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [107]:
import gzip
import numpy as np
import pandas as pd
from time import time

import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense, Dropout, Activation # Types of layers to be used in the model
from keras.models import Sequential  # Model type to be used
from tensorflow.python.keras import regularizers

from keras.utils import np_utils                       
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D, Flatten
from keras.layers import BatchNormalization

from keras.utils.np_utils import to_categorical
from keras.callbacks import TensorBoard

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()


In [108]:
# Data Set Understanding:
# training set images: 60,000 28x28 pixel images for training
# training set labels: 60,000 corresponding labels for training
# test set images: 10,000 28x28 pixel images for testing
# test set labels: 10,000 corresponding labels for testing

def read_mnist(images_path: str, labels_path: str):
    with gzip.open(labels_path, 'rb') as labelsFile:
        labels = np.frombuffer(labelsFile.read(), dtype=np.uint8, offset=8)
        # don't need to reshape labels

    with gzip.open(images_path,'rb') as imagesFile:
        length = len(labels)
        # Load flat 28x28 px images (784 px), and convert them to 28x28 px
        features = np.frombuffer(imagesFile.read(), dtype=np.uint8, offset=16) \
                        .reshape(length, 784) \
                        .reshape(length, 28, 28, 1)
        
    return features, labels

In [109]:
# citation: https://colab.research.google.com/github/AviatorMoser/keras-mnist-tutorial/blob/master/MNIST%20in%20Keras.ipynb#scrollTo=e1PU9ymwIaOj
# we don't need to flatten each image into a 784-length vector because we want to perform convolutions first
# add an additional dimension to represent the single-channel

# Expected shapes:
#X_train
# (60000, 28, 28, 1)
#y_train
# (60000,) ONE DIMENSIONAL ARRAY
# X_test
# (10000, 28, 28, 1)
# y_test
# (10000,)

# initialize multi-dimensional arrays by providing shape
X_train = np.empty(shape=(60000, 28, 28, 1), dtype='object')
y_train = np.empty(shape=(60000,), dtype='object')
X_test = np.empty(shape=(10000, 28, 28, 1), dtype='object')
y_test = np.empty(shape=(10000,), dtype='object')

X_train, y_train = read_mnist('train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz')
X_test, y_test = read_mnist('t10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz')

In [110]:
# verify that read_mnist function call worked
#print(X_train[5:8, 7:10 ])
#print (y_test[20:25])

In [111]:
# Preparing pixel data
# Pixel values for each image in the dataset are unsigned integers in the range between black and white, or 0 and 255
# Normalize the pixel values of grayscale images (rescale them to the range [0,1] )

# convert integers to 32-bit floating point numbers
X_train = X_train.astype('float32')         
X_test = X_test.astype('float32')

# then divide the pixel values by the maximum value in order to normalize
X_train = X_train/255                              
X_test = X_test/255

In [112]:
# one-hot encoding for classes/labels
# number of unique digits
nb_classes = 10 

Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

In [113]:
# For CNN model architecture, this resource below was very helpful:
# citation: https://colab.research.google.com/github/slxu/CSE548-Course-Project/blob/master/mnist_keras.ipynb#scrollTo=4IXmdGQSlPz1 

#Linear stacking of layers
model = Sequential()

# For implementing L2 Regularization:
# citation: https://colab.research.google.com/github/dphi-official/Deep_Learning_Bootcamp/blob/master/Optimization_Techniques/Regularization_and_Dropout.ipynb#scrollTo=s4afU-2YsDQg
# kernel_regularizer is a parameter of Dense

# Convolution Layer 1
# 32 different 3x3 kernels --> 32 feature maps
model.add(Conv2D(32, (3, 3), input_shape=(28,28,1), kernel_regularizer=regularizers.l2(l2=0.01))) 
# normalize each feature map before activation
model.add(BatchNormalization(axis=-1))    
# activation           
convLayer01 = Activation('relu')                     
model.add(convLayer01)

# Convolution Layer 2
model.add(Conv2D(32, (3, 3), kernel_regularizer=regularizers.l2(l2=0.2)))     
model.add(BatchNormalization(axis=-1))               
model.add(Activation('relu')) 
# Pool the max values over a 2x2 kernel                       
convLayer02 = MaxPooling2D(pool_size=(2,2))          
model.add(convLayer02)

# Convolution Layer 3
# 64 different 3x3 kernels --> so 64 feature maps
model.add(Conv2D(64,(3, 3), kernel_regularizer=regularizers.l2(l2=0.01)))      
model.add(BatchNormalization(axis=-1))               
convLayer03 = Activation('relu')                     
model.add(convLayer03)

# Convolution Layer 4
model.add(Conv2D(64, (3, 3), kernel_regularizer=regularizers.l2(l2=0.01)))     
model.add(BatchNormalization(axis=-1))               
model.add(Activation('relu'))
# Pool the max values over a 2x2 kernel                        
convLayer04 = MaxPooling2D(pool_size=(2,2))          
model.add(convLayer04)
# Flatten final 4x4x64 output matrix into a 1024-length vector
model.add(Flatten())                                 

# Fully Connected Layer 5
# 512 FCN nodes
model.add(Dense(512, kernel_regularizer = regularizers.l2(l2=0.01)))              
model.add(BatchNormalization())                      
model.add(Activation('relu'))                        

# Fully Connected Layer 6
# 20% dropout of randomly selected nodes                       
model.add(Dropout(0.2))
# final 10 FCN nodes                              
model.add(Dense(10, kernel_regularizer = regularizers.l2(l2=0.01)))
# softmax activation for output layer           
model.add(Activation('softmax'))                     

# compile model
model.compile(loss='categorical_crossentropy', optimizer='SGD', metrics=['accuracy'])


In [114]:
model.summary()

Model: "sequential_28"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_107 (Conv2D)         (None, 26, 26, 32)        320       
                                                                 
 batch_normalization_120 (Ba  (None, 26, 26, 32)       128       
 tchNormalization)                                               
                                                                 
 activation_133 (Activation)  (None, 26, 26, 32)       0         
                                                                 
 conv2d_108 (Conv2D)         (None, 24, 24, 32)        9248      
                                                                 
 batch_normalization_121 (Ba  (None, 24, 24, 32)       128       
 tchNormalization)                                               
                                                                 
 activation_134 (Activation)  (None, 24, 24, 32)     

In [116]:
# validation during training stage
# citation: https://stackoverflow.com/questions/68428331/is-validation-split-0-2-in-keras-a-cross-validation
# citation: https://datascience.stackexchange.com/questions/38955/how-does-the-validation-split-parameter-of-keras-fit-function-work

#steps_per_epoch = # samples divided by batch size
model.fit(X_train, Y_train, batch_size=128, epochs=5, verbose=1, validation_split=0.2, shuffle = True, steps_per_epoch=48000//128)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f0ef908dd90>

In [117]:
# citation: https://androidkt.com/what-does-model-evaluate-return-keras/
score = model.evaluate(X_test, Y_test)
#print(model.metrics_names)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 1.893710732460022
Test accuracy: 0.9918000102043152
