In [1]:
%matplotlib inline

# Assignment 01 - Exploring CNNs for MNIST

In this assignment, we want you to modify the CNN architecture that we used in the last MNIST exercise, adding new layers and altering their hyperparameters. We have already loaded and preprocessed the data for you, so you can focus on the architecture and training of your network.

----------------------
*** You do not need to alter the blocks `Imports`, `Load training data` and `Preprocessing` ***
## Imports

In [2]:
import numpy as np
from random import shuffle, seed
seed(42)  #keep this seed, in order to compare the results with your classmates

import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (15,15) # Make the figures a bit bigger

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras import optimizers

Using TensorFlow backend.


## Load training data

In [3]:
nb_classes = 10

# the data, shuffled and split between trainVal and test sets
(trainVal_data, trainVal_label), (X_test, y_test) = mnist.load_data()

# We want now to split the trainVal data into train and validation sets
nData = trainVal_data.shape[0]  #find the size of trainVal
nTrain = int(nData * 0.8)  #80% to train, 20% to val

randomIdx = list(range(nData))   #randomly select indexes
shuffle(randomIdx)
trainIdx = randomIdx[:nTrain] 
valIdx = randomIdx[nTrain:]

# Split the data
X_val, y_val = trainVal_data[valIdx], trainVal_label[valIdx]
X_train, y_train = trainVal_data[trainIdx], trainVal_label[trainIdx]

## Obtain class weights and samples per class

In [23]:
unique, counts = np.unique(y_train, return_counts=True)
print("Train ---> ", dict(zip(unique, counts)), "\n")


from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight('balanced', np.unique(y_train), y_train)
train_class_weights = dict(enumerate(class_weights))

print(train_class_weights, "\n")

unique, counts = np.unique(y_test, return_counts=True)
test_sample_per_class = counts

Train --->  {0: 4724, 1: 5393, 2: 4723, 3: 4881, 4: 4704, 5: 4313, 6: 4769, 7: 5001, 8: 4730, 9: 4762} 

{0: 1.0160880609652836, 1: 0.8900426478768775, 2: 1.0163031971204743, 3: 0.9834050399508297, 4: 1.0204081632653061, 5: 1.1129144447020636, 6: 1.0065003145313483, 7: 0.9598080383923215, 8: 1.014799154334038, 9: 1.0079798404031919} 



## Preprocessing

In [13]:
img_rows, img_cols = 28, 28

#The first dimension refers to the number of images
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_val = X_val.reshape(X_val.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)

X_train = X_train.astype('float32')
X_val = X_val.astype('float32')
X_test = X_test.astype('float32')

X_train /= 255
X_val /= 255
X_test /= 255

Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_val = np_utils.to_categorical(y_val, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

print("Training matrix shape", X_train.shape)
print("Validation matrix shape", X_val.shape)
print("Testing matrix shape", X_test.shape)

Training matrix shape (48000, 28, 28, 1)
Validation matrix shape (12000, 28, 28, 1)
Testing matrix shape (10000, 28, 28, 1)


------------------------------------------
------------------------------------------
------------------------------------------
------------------------------------------

***Your assignment starts here!!!***

# Task 1 [0.25 pts] - Add a fully-connected layer
Let's investigate if the network gets better as we add more layers to it. We want you to add another fully-connected layer to the network from last exercise. Your network will have:
- Convolutional layer with 10 filters of size 5x5 with ReLU activation;
- Max pooling layer with kernel 2x2, that will reduce each spatial dimension by half (24x24 to 12x12);
- An operation to flatten the feature maps into an array of size 10x12x12 = 1440
- Dropout operation with probability 0.25, applied to flattened array
- **Fully connected layer with 100 units/neurons and ReLU activation;**
- Fully connected layer with units/neurons equal to the number of classes in our problem (in this case, 10);
- Softmax activation on the last FC layer.

The architecture will look like this:


<img src="task1_cnn.png" />

In [20]:
# define your model here
model = Sequential()

#Conv layer with 10 filters of size 5x5 and ReLU activation
model.add(Conv2D(10, kernel_size=(5,5), activation='relu', input_shape=(28,28,1)))

#Max pooling of size 2x2
model.add(MaxPooling2D(pool_size=(2,2)))

#Flatten operation
model.add(Flatten())

#Dropout with probability 0.25
model.add(Dropout(0,25))

#FC layer with 100 neurons and softmax activation
model.add(Dense(100, activation='relu'))

#FC layer with 10 neurons and softmax activation
model.add(Dense(10, activation='softmax'))

## Compile and train your model

In [21]:
# compile and train your model
model.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])

## Evaluate on the test data

In [24]:
score = model.evaluate(X_test, Y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy (NOT NORMALIZED):', score[1])


predicted_classes = model.predict_classes(X_test)
Y_test_classes = np.argmax(Y_test, axis=-1)

accPerClass = []
for classIdx in range(nb_classes):
    idx = (Y_test_classes == classIdx)
    
    correctPred = np.sum(predicted_classes[idx] == Y_test_classes[idx])
    accPerClass.append( correctPred / float(test_sample_per_class[classIdx]))
    
print(dict(zip(range(nb_classes),accPerClass)), "\n")
print("Normalized Acc --> ", np.mean(accPerClass))

Test loss: 2.3027055946350097
Test accuracy (NOT NORMALIZED): 0.0857
{0: 0.0020408163265306124, 1: 0.48105726872246696, 2: 0.003875968992248062, 3: 0.0, 4: 0.0814663951120163, 5: 0.09080717488789238, 6: 0.011482254697286013, 7: 0.11964980544747082, 8: 0.002053388090349076, 9: 0.007928642220019821} 

Normalized Acc -->  0.080036171449628


----------------
----------------
# Task 2 [0.25 pts] - Add another (conv + max pooling) layers
We want you to add another convolutional layer, followed by a max pooling to the network. Your network will have:
- Convolutional layer with 10 filters of size 5x5 with ReLU activation;
- Max pooling layer with kernel 2x2, that will reduce each spatial dimension by half (24x24 to 12x12);
- **Convolutional layer with 20 filters of size 5x5 with ReLU activation;**
- **Max pooling layer with kernel 2x2, that will reduce each spatial dimension by half (8x8 to 4x4);**
- An operation to flatten the feature maps into an array of size 20x4x4 = 320
- Dropout operation with probability 0.25, applied to flattened array
- Fully connected layer with units/neurons equal to the number of classes in our problem (in this case, 10);
- Softmax activation on the last FC layer.

The architecture will look like this:


<img src="task2_cnn.png" />

In [25]:
# define your model here
model = Sequential()

#Conv layer with 10 filters of size 5x5 and ReLU activation
model.add(Conv2D(10, kernel_size=(5, 5),
activation='relu', #ReLU activation
input_shape=(28, 28, 1))) #no need to include the batch size

#Max pooling of size 2x2
model.add(MaxPooling2D(pool_size=(2, 2)))

#Conv layer with 20 filters of size 5x5 and ReLU activation
model.add(Conv2D(20, kernel_size=(5, 5),
activation='relu', #ReLU activation
input_shape=(12, 12, 1))) #no need to include the batch size

#Max pooling of size 2x2
model.add(MaxPooling2D(pool_size=(2, 2))) 

#Flatten operation
model.add(Flatten())

#Dropout with probability 0.25
model.add(Dropout(0.25))

#FC layer with 10 neurons and softmax activation
model.add(Dense(10, activation='softmax'))

## Compile and train your model

In [26]:
# compile and train your model
sgd = optimizers.SGD(lr=0.01) #lr = learning rate
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

## Evaluate on the test data

In [27]:
score = model.evaluate(X_test, Y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy (NOT NORMALIZED):', score[1])


predicted_classes = model.predict_classes(X_test)
Y_test_classes = np.argmax(Y_test, axis=-1)

accPerClass = []
for classIdx in range(nb_classes):
    idx = (Y_test_classes == classIdx)
    
    correctPred = np.sum(predicted_classes[idx] == Y_test_classes[idx])
    accPerClass.append( correctPred / float(test_sample_per_class[classIdx]))
    
print(dict(zip(range(nb_classes),accPerClass)), "\n")
print("Normalized Acc --> ", np.mean(accPerClass))

Test loss: 2.3025586582183837
Test accuracy (NOT NORMALIZED): 0.126
{0: 0.2979591836734694, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.04378818737270876, 5: 0.03139013452914798, 6: 0.8100208768267223, 7: 0.11770428015564202, 8: 0.0, 9: 0.0} 

Normalized Acc -->  0.13008626625576905


----------------
----------------
# Task 3 [0.25 pts] - Add Conv + MaxPool + FC
Now combine tasks 1 and 2; adding the convolutional, max pooling and fc layer to your network. Your CNN will have:
- Convolutional layer with 10 filters of size 5x5 with ReLU activation;
- Max pooling layer with kernel 2x2, that will reduce each spatial dimension by half (24x24 to 12x12);
- **Convolutional layer with 20 filters of size 5x5 with ReLU activation;**
- **Max pooling layer with kernel 2x2, that will reduce each spatial dimension by half (8x8 to 4x4);**
- An operation to flatten the feature maps into an array of size 20x4x4 = 320
- Dropout operation with probability 0.25, applied to flattened array
- **Fully connected layer with 100 units/neurons and ReLU activation;**
- Fully connected layer with units/neurons equal to the number of classes in our problem (in this case, 10);
- Softmax activation on the last FC layer.

The architecture will look like this:


<img src="task3_cnn.png" />

In [28]:
# define your model here
model = Sequential()

#Conv layer with 10 filters of size 5x5 and ReLU activation
model.add(Conv2D(10, kernel_size=(5, 5),
activation='relu', #ReLU activation
input_shape=(28, 28, 1))) #no need to include the batch size

#Max pooling of size 2x2
model.add(MaxPooling2D(pool_size=(2, 2)))

#Conv layer with 20 filters of size 5x5 and ReLU activation
model.add(Conv2D(20, kernel_size=(5, 5),
activation='relu', #ReLU activation
input_shape=(12, 12, 1))) #no need to include the batch size

#Max pooling of size 2x2
model.add(MaxPooling2D(pool_size=(2, 2))) 

#Flatten operation
model.add(Flatten())

#Dropout with probability 0.25
model.add(Dropout(0.25))

#FC layer with 100 neurons and softmax activation
model.add(Dense(100, activation='relu'))

#FC layer with 10 neurons and softmax activation
model.add(Dense(10, activation='softmax'))

## Compile and train your model

In [29]:
# compile and train your model
sgd = optimizers.SGD(lr=0.01) #lr = learning rate
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

## Evaluate on the test data

In [30]:
score = model.evaluate(X_test, Y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy (NOT NORMALIZED):', score[1])


predicted_classes = model.predict_classes(X_test)
Y_test_classes = np.argmax(Y_test, axis=-1)

accPerClass = []
for classIdx in range(nb_classes):
    idx = (Y_test_classes == classIdx)
    
    correctPred = np.sum(predicted_classes[idx] == Y_test_classes[idx])
    accPerClass.append( correctPred / float(test_sample_per_class[classIdx]))
    
print(dict(zip(range(nb_classes),accPerClass)), "\n")
print("Normalized Acc --> ", np.mean(accPerClass))

Test loss: 2.302569728088379
Test accuracy (NOT NORMALIZED): 0.1033
{0: 0.0, 1: 0.0, 2: 0.0, 3: 0.009900990099009901, 4: 0.0, 5: 0.0, 6: 0.0, 7: 0.995136186770428, 8: 0.0, 9: 0.0} 

Normalized Acc -->  0.1005037176869438


----------------
----------------
# Task 4 [0.25 pts] - Implement a new modification
Implement **one** modification to your network and evaluate it. Some possible alterations are:
- Add more convolutional and/or max pooling layers;
- Alter the kernel size and number of filters of the conv layers;
- Try training with different batch sizes and higher number of epochs;
- Try with different activations, besides ReLU and Softmax;
- Try optimizing the CNN with a different loss;
- Try different learning rates;