In [1]:
# Libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Input, AveragePooling2D, Flatten
from tensorflow.keras.activations import linear, relu, sigmoid
from tensorflow.keras.optimizers import Adam, SGD
from keras.metrics import categorical_crossentropy
import matplotlib.pyplot as plt
import gzip
import struct 

In [2]:
#Filenames
test_images = 't10k-images-idx3-ubyte.gz'
test_labels = 't10k-labels-idx1-ubyte.gz'
train_images = 'train-images-idx3-ubyte.gz'
train_labels = 'train-labels-idx1-ubyte.gz'
file_arr = [test_images, test_labels,train_images,train_labels]

#### File Formats for the MNIST Database

Per the MNIST landing page, the files are in IDX format, which is supposedly an ideal format for compression of vectors and multidimensional matrices. I have not personally written any code up until this point to read through an idx format file, so the code below is a product of online documentation.

##### Image File
The image file contains the pixel input to all of the training and test examples in the MNIST datset. 

- First 4 Bytes: Magic Number -> 3rd byte represents the data type; 4th byte represents the number of dimensions of the matrix/vector (2 in this case)
- Second 4 Bytes: Number of Images
- Third 4 bytes: Number of Rows
- Fourth 4 bytes: Number of Columns
- Rest -> Pixel Data
    - Integer - 1 byte each
    - from 0-255  


![training_image_format.png](attachment:a59286a9-9807-484b-943e-e09e2533cdcf.png)


##### Label File
The label file will be a (60000,) populated by the labels of the corresponding image file - matching in index.

- First 4 Bytes: Magic Number -> 3rd byte represents the data type; 4th byte represents the number of dimensions of the matrix/vector (2 in this case)
- Second 4 Bytes: Number of Items
- Rest -> Label Data
    - Integer - 1 byte each
    - from 0-9 

![training_label_format.png](attachment:15effb0d-09eb-4ac1-a17b-57163e53deaf.png)


#### Useful Libraries and Functions

##### Struct
Since it is noted in the MNIST dataset landing page that the data is stored in a C array, we needed to use a Python library to convert the contents of the data files into a python interpretable format. 

Using information of the number of bytes per data parameter as well as the data type for each, we are able to extract the necessary information from the file using struct's format strings. 

###### Struct Format Strings
- Magic Number, Rows, Columns, Number of Items are all specified as 32 bit integers -> ">I"
- Pixels, Labels are specified as a single byte integer -> ">B"

">" eludes to the data being formatted in MBS first (high endian) as mentioned in the landing page

In [23]:
# Function to parse MNIST data downloaded from http://yann.lecun.com/exdb/mnist/ 
def load_data(file, type):
    with gzip.open(file, 'rb') as f:
        if type == 'images':
            magic_num, = struct.unpack(">I", f.read(4))
            nimages, = struct.unpack(">I", f.read(4))
            nrows, ncols = struct.unpack(">II", f.read(8))
            totalBytes = nrows*ncols*nimages
            data = np.asarray(struct.unpack(">"+"B"*totalBytes, f.read(totalBytes)))
            data = data.reshape((nimages,nrows,ncols))    
        else:
            magic_num, = struct.unpack(">I", f.read(4))
            nitems, = struct.unpack(">I", f.read(4))
            data = np.asarray(struct.unpack(">"+"B"*nitems, f.read(nitems)))
            data = data.reshape((nitems))
    return data
    

In [24]:
# Import MNIST Database - use the load_data function to extract X_train (pixels) and Y_train(labels)
X_train = load_data(train_images, 'images')
Y_train = load_data(train_labels, 'labels')
print(X_train.shape, Y_train.shape)

(60000, 28, 28) (60000,)


### Neural Network Architecture 
*referencing LeCunn '98

In [16]:
# Build a Sequential Tensorflow Model modelling LeNet 

model = Sequential([
    Conv2D(filters = 6, kernel_size = (5,5), padding = 'same', input_shape = (28,28,1), activation = 'tanh', name = "C1"),
    AveragePooling2D(pool_size = (2,2), strides = 2, name = "S2"),
    Conv2D(filters = 16, kernel_size = (5,5), activation = 'tanh', name = "C3"),
    AveragePooling2D(pool_size = (2,2), strides = 2, name = "S4"),
    Conv2D(filters = 120, kernel_size = (5,5), activation = 'tanh', name = "C5"),
    Dense(units = 84, activation = 'tanh', name = 'F6'),
    Dense(units = 10, activation = 'softmax', name = "Output")
])

opt = Adam(learning_rate = .001)
model.compile(optimizer = opt,
              loss = categorical_crossentropy ,
              metrics = ['accuracy'])
model.build()
print(model.summary())


None
