<a href="https://colab.research.google.com/github/snickerphudle/FutureMakers2021/blob/main/CNN_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#import a bunch of stuff

import numpy as np
import pandas as pd

#visualization
import seaborn as sns
import matplotlib.pyplot as plt

#data processing
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

#model
from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Flatten, Dropout, Dense
from keras.optimizers import Adam, SGD

#dataset
from tensorflow.keras.datasets import mnist
import tensorflow as tf

In [2]:
#import data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
#images are arrays of pixels, easy way to normalize it by dividing by 255 so each pixel will be in the range of [0,1]
X_train = X_train / 255.0
X_test = X_test / 255.0

In [4]:
#one-hot encoding
from tensorflow.keras.utils import to_categorical

y_train = to_categorical(y_train, num_classes = 10)
y_test = to_categorical(y_test, num_classes = 10)

In [5]:
#reshape. 2DConv Layer requires input of 4D = (batch_size, height, width, color channel)
#-1 is a hack which means give me everything
#since we're resizing to the same 28x28, it will be 60000
#if we don't know the dimensions, use -1 so it will do the calculation.

X_train = X_train.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)

In [6]:
#building model architecture
#conv pool conv pool flatten dense dense
model = Sequential()

#conv + pooling, feature extraction
#filters, kernel, default padding, default stride, activation, input_shape
model.add(Conv2D(filters = 20, kernel_size = (3,3), activation = 'relu', input_shape = (28, 28, 1)))
model.add(MaxPool2D(pool_size = (2,2)))
model.add(Conv2D(filters = 10, kernel_size = (3,3), activation = 'relu'))
model.add(MaxPool2D(pool_size = (2,2)))

#flatten feature maps
model.add(Flatten())

#dense layers, fully connected
model.add(Dense(units = 64, activation = 'relu'))

#prevent dropout, good rate is 0.2 - 0.5
#penalize model based on decisions it makes because of features
model.add(Dropout(rate = 0.2))

#output layer
model.add(Dense(units = 10, activation = 'softmax'))

In [7]:
#usually good to add
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 20)        200       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 20)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 10)        1810      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 10)          0         
_________________________________________________________________
flatten (Flatten)            (None, 250)               0         
_________________________________________________________________
dense (Dense)                (None, 64)                16064     
_________________________________________________________________
dropout (Dropout)            (None, 64)                0

In [14]:
# Optimizer
#learning rate is one of the most important hyperparameters
#optimizer = SGD(learning_rate = 0.01, momentum = 0.99)
#momentum is good for dealing with outliers

a = Adam(learning_rate = 0.001)

model.compile(optimizer = a, loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [9]:
#SDG - 1 -> update
#Mini-batch = 1-60000 -> update
#batch = 60000 -> update

#small batch -> outliers will throw off the model a lot
#large batch size -> longer to reach minima
#mini batch = perfect :D

#powers of 2 are generally good
BATCH_SIZE = 32
EPOCHS = 10

In [15]:
history = model.fit(X_train, y_train, batch_size = BATCH_SIZE, epochs = EPOCHS, validation_data = (X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
