In [1]:
%matplotlib inline

# Assignment 01 - Exploring CNNs for MNIST

In this assignment, we want you to modify the CNN architecture that we used in the last MNIST exercise, adding new layers and altering their hyperparameters. We have already loaded and preprocessed the data for you, so you can focus on the architecture and training of your network.

----------------------
*** You do not need to alter the blocks `Imports`, `Load training data` and `Preprocessing` ***
## Imports

In [2]:
import numpy as np
from random import shuffle, seed
seed(42)  #keep this seed, in order to compare the results with your classmates

import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (15,15) # Make the figures a bit bigger

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras import optimizers

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Load training data

In [3]:
nb_classes = 10

# the data, shuffled and split between trainVal and test sets
(trainVal_data, trainVal_label), (X_test, y_test) = mnist.load_data()

# We want now to split the trainVal data into train and validation sets
nData = trainVal_data.shape[0]  #find the size of trainVal
nTrain = int(nData * 0.8)  #80% to train, 20% to val

randomIdx = list(range(nData))   #randomly select indexes
shuffle(randomIdx)
trainIdx = randomIdx[:nTrain] 
valIdx = randomIdx[nTrain:]

# Split the data
X_val, y_val = trainVal_data[valIdx], trainVal_label[valIdx]
X_train, y_train = trainVal_data[trainIdx], trainVal_label[trainIdx]

## Obtain class weights and samples per class

In [4]:
unique, counts = np.unique(y_train, return_counts=True)
print("Train ---> ", dict(zip(unique, counts)), "\n")


from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight('balanced', np.unique(y_train), y_train)
train_class_weights = dict(enumerate(class_weights))

unique, counts = np.unique(y_test, return_counts=True)
test_sample_per_class = counts

Train --->  {0: 4724, 1: 5393, 2: 4723, 3: 4881, 4: 4704, 5: 4313, 6: 4769, 7: 5001, 8: 4730, 9: 4762} 



## Preprocessing

In [5]:
img_rows, img_cols = 28, 28

#The first dimension refers to the number of images
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_val = X_val.reshape(X_val.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)

X_train = X_train.astype('float32')
X_val = X_val.astype('float32')
X_test = X_test.astype('float32')

X_train /= 255
X_val /= 255
X_test /= 255

Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_val = np_utils.to_categorical(y_val, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

------------------------------------------
------------------------------------------
------------------------------------------
------------------------------------------

***Your assignment starts here!!!***

# Task 1 [0.25 pts] - Add a fully-connected layer
Let's investigate if the network gets better as we add more layers to it. We want you to add another fully-connected layer to the network from last exercise. Your network will have:
- Convolutional layer with 10 filters of size 5x5 with ReLU activation;
- Max pooling layer with kernel 2x2, that will reduce each spatial dimension by half (24x24 to 12x12);
- An operation to flatten the feature maps into an array of size 10x12x12 = 1440
- Dropout operation with probability 0.25, applied to flattened array
- **Fully connected layer with 100 units/neurons and ReLU activation;**
- Fully connected layer with units/neurons equal to the number of classes in our problem (in this case, 10);
- Softmax activation on the last FC layer.

The architecture will look like this:


<img src="task1_cnn.png" />

In [6]:
# define your model here
model = Sequential()

# Add the Convolution Layer with 5x5 Kernel and 10 filters
model.add(Conv2D(10, 
                 kernel_size=(5,5),
                 activation='relu',
                 input_shape=(28,28,1)))

# Add pooling layer with kernel 2x2
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten
model.add(Flatten())

# Dropout with probability 0.25
model.add(Dropout(0.25))

# Fully Connected layer with 100 units and ReLU activation
model.add(Dense(100, activation='relu'))

# FC layer with 10 units and Softmax
model.add(Dense(nb_classes, activation='softmax'))

## Compile and train your model

In [7]:
# compile and train your model
sgd = optimizers.SGD(lr=0.01) #lr = learning rate
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.fit(X_train, Y_train,
          batch_size=128, epochs=4, verbose=1,
          class_weight = train_class_weights,
          validation_data=(X_val, Y_val))

Train on 48000 samples, validate on 12000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x22557376710>

## Evaluate on the test data

In [8]:
score = model.evaluate(X_test, Y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy (NOT NORMALIZED):', score[1])


predicted_classes = model.predict_classes(X_test)
Y_test_classes = np.argmax(Y_test, axis=-1)

accPerClass = []
for classIdx in range(nb_classes):
    idx = (Y_test_classes == classIdx)
    
    correctPred = np.sum(predicted_classes[idx] == Y_test_classes[idx])
    accPerClass.append( correctPred / float(test_sample_per_class[classIdx]))
    
print(dict(zip(range(nb_classes),accPerClass)), "\n")
print("Normalized Acc --> ", np.mean(accPerClass))

Test loss: 0.2525385090440512
Test accuracy (NOT NORMALIZED): 0.9281
{0: 0.9795918367346939, 1: 0.9806167400881057, 2: 0.8798449612403101, 3: 0.9207920792079208, 4: 0.9358452138492872, 5: 0.905829596412556, 6: 0.9467640918580376, 7: 0.9134241245136187, 8: 0.9014373716632443, 9: 0.910802775024777} 

Normalized Acc -->  0.9274948790592552


----------------
----------------
# Task 2 [0.25 pts] - Add another (conv + max pooling) layers
We want you to add another convolutional layer, followed by a max pooling to the network. Your network will have:
- Convolutional layer with 10 filters of size 5x5 with ReLU activation;
- Max pooling layer with kernel 2x2, that will reduce each spatial dimension by half (24x24 to 12x12);
- **Convolutional layer with 20 filters of size 5x5 with ReLU activation;**
- **Max pooling layer with kernel 2x2, that will reduce each spatial dimension by half (8x8 to 4x4);**
- An operation to flatten the feature maps into an array of size 20x4x4 = 320
- Dropout operation with probability 0.25, applied to flattened array
- Fully connected layer with units/neurons equal to the number of classes in our problem (in this case, 10);
- Softmax activation on the last FC layer.

The architecture will look like this:


<img src="task2_cnn.png" />

In [9]:
# define your model here
model = Sequential()

# Add the Convolution Layer with 5x5 Kernel and 10 filters
model.add(Conv2D(10, 
                 kernel_size=(5,5),
                 activation='relu',
                 input_shape=(28,28,1)))

# Add pooling layer with kernel 2x2
model.add(MaxPooling2D(pool_size=(2, 2)))

# Add another Convolutional Layer 20 filters and 5x5 kernel
model.add(Conv2D(20, 
                 kernel_size=(5,5),
                 activation='relu'))

# Add pooling layer with kernel 2x2
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten
model.add(Flatten())

# Dropout with probability 0.25
model.add(Dropout(0.25))

# FC layer with 10 units and Softmax
model.add(Dense(nb_classes, activation='softmax'))

## Compile and train your model

In [10]:
# compile and train your model
sgd = optimizers.SGD(lr=0.01) #lr = learning rate
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.fit(X_train, Y_train,
          batch_size=128, epochs=4, verbose=1,
          class_weight = train_class_weights,
          validation_data=(X_val, Y_val))

Train on 48000 samples, validate on 12000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x22554435320>

## Evaluate on the test data

In [11]:
score = model.evaluate(X_test, Y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy (NOT NORMALIZED):', score[1])


predicted_classes = model.predict_classes(X_test)
Y_test_classes = np.argmax(Y_test, axis=-1)

accPerClass = []
for classIdx in range(nb_classes):
    idx = (Y_test_classes == classIdx)
    
    correctPred = np.sum(predicted_classes[idx] == Y_test_classes[idx])
    accPerClass.append( correctPred / float(test_sample_per_class[classIdx]))
    
print(dict(zip(range(nb_classes),accPerClass)), "\n")
print("Normalized Acc --> ", np.mean(accPerClass))

Test loss: 0.1902854533225298
Test accuracy (NOT NORMALIZED): 0.9469
{0: 0.9816326530612245, 1: 0.9806167400881057, 2: 0.9321705426356589, 3: 0.9386138613861386, 4: 0.9429735234215886, 5: 0.952914798206278, 6: 0.9603340292275574, 7: 0.914396887159533, 8: 0.9271047227926078, 9: 0.9365708622398414} 

Normalized Acc -->  0.9467328620218535


----------------
----------------
# Task 3 [0.25 pts] - Add Conv + MaxPool + FC
Now combine tasks 1 and 2; adding the convolutional, max pooling and fc layer to your network. Your CNN will have:
- Convolutional layer with 10 filters of size 5x5 with ReLU activation;
- Max pooling layer with kernel 2x2, that will reduce each spatial dimension by half (24x24 to 12x12);
- **Convolutional layer with 20 filters of size 5x5 with ReLU activation;**
- **Max pooling layer with kernel 2x2, that will reduce each spatial dimension by half (8x8 to 4x4);**
- An operation to flatten the feature maps into an array of size 20x4x4 = 320
- Dropout operation with probability 0.25, applied to flattened array
- **Fully connected layer with 100 units/neurons and ReLU activation;**
- Fully connected layer with units/neurons equal to the number of classes in our problem (in this case, 10);
- Softmax activation on the last FC layer.

The architecture will look like this:


<img src="task3_cnn.png" />

In [12]:
# define your model here
model = Sequential()

# Add the Convolution Layer with 5x5 Kernel and 10 filters
model.add(Conv2D(10, 
                 kernel_size=(5,5),
                 activation='relu',
                 input_shape=(28,28,1)))

# Add pooling layer with kernel 2x2
model.add(MaxPooling2D(pool_size=(2, 2)))

# Add another Convolutional Layer 20 filters and 5x5 kernel
model.add(Conv2D(20, 
                 kernel_size=(5,5),
                 activation='relu'))

# Add pooling layer with kernel 2x2
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten
model.add(Flatten())

# Dropout with probability 0.25
model.add(Dropout(0.25))

# Fully Connected layer with 100 units and ReLU activation
model.add(Dense(100, activation='relu'))

# FC layer with 10 units and Softmax
model.add(Dense(nb_classes, activation='softmax'))

## Compile and train your model

In [13]:
# compile and train your model
sgd = optimizers.SGD(lr=0.01) #lr = learning rate
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.fit(X_train, Y_train,
          batch_size=128, epochs=4, verbose=1,
          class_weight = train_class_weights,
          validation_data=(X_val, Y_val))

Train on 48000 samples, validate on 12000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x22559c5a6d8>

## Evaluate on the test data

In [14]:
score = model.evaluate(X_test, Y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy (NOT NORMALIZED):', score[1])


predicted_classes = model.predict_classes(X_test)
Y_test_classes = np.argmax(Y_test, axis=-1)

accPerClass = []
for classIdx in range(nb_classes):
    idx = (Y_test_classes == classIdx)
    
    correctPred = np.sum(predicted_classes[idx] == Y_test_classes[idx])
    accPerClass.append( correctPred / float(test_sample_per_class[classIdx]))
    
print(dict(zip(range(nb_classes),accPerClass)), "\n")
print("Normalized Acc --> ", np.mean(accPerClass))

Test loss: 0.16640454709231853
Test accuracy (NOT NORMALIZED): 0.9501
{0: 0.9877551020408163, 1: 0.9850220264317181, 2: 0.9418604651162791, 3: 0.9455445544554455, 4: 0.929735234215886, 5: 0.9316143497757847, 6: 0.9624217118997912, 7: 0.9289883268482491, 8: 0.9373716632443532, 9: 0.9454905847373637} 

Normalized Acc -->  0.9495804018765686


----------------
----------------
# Task 4 [0.25 pts] - Implement a new modification
Implement **one** modification to your network and evaluate it. Some possible alterations are:
- Add more convolutional and/or max pooling layers;
- Alter the kernel size and number of filters of the conv layers;
- Try training with different batch sizes and higher number of epochs;
- Try with different activations, besides ReLU and Softmax;
- Try optimizing the CNN with a different loss;
- Try different learning rates;

# Define the model

In [15]:
# define your model here
model = Sequential()

# Add the Convolution Layer with 5x5 Kernel and 10 filters
model.add(Conv2D(10, 
                 kernel_size=(5,5),
                 activation='relu',
                 input_shape=(28,28,1)))

# Add pooling layer with kernel 2x2
model.add(MaxPooling2D(pool_size=(2, 2)))

# Add another Convolutional Layer 30 filters and 5x5 kernel
model.add(Conv2D(30, 
                 kernel_size=(5,5),
                 activation='relu'))

# Add another Convolutional Layer 20 filters and 3x3 kernel
model.add(Conv2D(20, 
                 kernel_size=(3,3),
                 activation='relu'))

# Add pooling layer with kernel 2x2
model.add(MaxPooling2D(pool_size=(2, 2)))


# Flatten
model.add(Flatten())

# Dropout with probability 0.25
model.add(Dropout(0.25))

# Fully Connected layer with 50 units and ReLU activation
model.add(Dense(50, activation='relu'))

# FC layer with 10 units and Softmax
model.add(Dense(nb_classes, activation='softmax'))

## Compile and train your model

In [16]:
# compile and train your model
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.fit(X_train, Y_train,
          batch_size=128, epochs=4, verbose=1,
          class_weight = train_class_weights,
          validation_data=(X_val, Y_val))

Train on 48000 samples, validate on 12000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x2255a092588>

# Evaluate on the Test Data

In [17]:
score = model.evaluate(X_test, Y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy (NOT NORMALIZED):', score[1])


predicted_classes = model.predict_classes(X_test)
Y_test_classes = np.argmax(Y_test, axis=-1)

accPerClass = []
for classIdx in range(nb_classes):
    idx = (Y_test_classes == classIdx)
    
    correctPred = np.sum(predicted_classes[idx] == Y_test_classes[idx])
    accPerClass.append( correctPred / float(test_sample_per_class[classIdx]))
    
print(dict(zip(range(nb_classes),accPerClass)), "\n")
print("Normalized Acc --> ", np.mean(accPerClass))

Test loss: 0.05020933179943822
Test accuracy (NOT NORMALIZED): 0.985
{0: 0.9948979591836735, 1: 0.9920704845814978, 2: 0.9786821705426356, 3: 0.9841584158415841, 4: 0.9826883910386965, 5: 0.9876681614349776, 6: 0.988517745302714, 7: 0.9854085603112841, 8: 0.9856262833675564, 9: 0.9702675916749257} 

Normalized Acc -->  0.9849985763279546
