Name: Nikhil Arora

Student ID: 20848206

## Loading Packages

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras.optimizers import SGD

Using TensorFlow backend.


## Importing and Reshaping the Dataset

In [2]:
from keras.datasets import fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

# reshape dataset to have a single channel
x_train = x_train.reshape((x_train.shape[0], 28, 28, 1))
x_test = x_test.reshape((x_test.shape[0], 28, 28, 1)) 

# Normalizing the pixel values to be in the range [0, 1] 
# by dividing them by 255
x_train = x_train/255.0
x_test = x_test/255.0

# Converting the labels into one-hot vectors
y_train = to_categorical(y_train, num_classes = 10)
y_test = to_categorical(y_test, num_classes = 10)

# Task 1: Build a neural network without convolutional layers to do the classification task

### Step 1: Defining a Model Architecture

In [3]:
model_T1 = Sequential()

#Input Layer
model_T1.add(Flatten(input_shape=(28, 28, 1)))

#Hidden Layers
model_T1.add(Dense(128, activation='relu'))
model_T1.add(Dense(64, activation='relu'))

#Output Layer
model_T1.add(Dense(10, activation='softmax'))

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


### Step 2: Compiling the model

In [4]:
# Defining Optimizer, loss function and evaluation metrics
model_T1.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [5]:
# Model Structure
model_T1.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               100480    
_________________________________________________________________
dense_2 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_3 (Dense)              (None, 10)                650       
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
_________________________________________________________________


### Step 3: Training the Model

In [6]:
# Setting a random state so that comparing the evaluation metrics is possible
tf.set_random_seed(1)
np.random.seed(1)

model_T1.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test))



Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x17b3c105188>

### Step 4: Evaluating the model with Testing dataset

In [7]:
scores = model_T1.evaluate(x_test, y_test, verbose=1)
print("%s: %.2f%%" % (model_T1.metrics_names[1], scores[1]*100))

accuracy: 88.54%


### Step 5: Changing the Model Structure to get better evaluation results

Changing the number of neurons: 

In [8]:
model_T1 = Sequential()
model_T1.add(Flatten(input_shape=(28, 28, 1)))
model_T1.add(Dense(256, activation='relu'))
model_T1.add(Dense(128, activation='relu'))
model_T1.add(Dense(10, activation='softmax'))

# Defining Optimizer, loss function and evaluation metrics
model_T1.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Setting a random state so that comparing the evaluation metrics is possible
tf.set_random_seed(1)
np.random.seed(1)
model_T1.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test), verbose = 0)

scores = model_T1.evaluate(x_test, y_test, verbose=1)
print("%s: %.2f%%" % (model_T1.metrics_names[1], scores[1]*100))

accuracy: 88.70%


# Task 2: Build a neural network with the use of convolutional layers

### Step 1: Defining Model Architecture

In [9]:
model_T2 = Sequential()
#Convolutional Layer 1
model_T2.add(Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1), 
                   padding='same'))
#Pooling Layer 1
model_T2.add(MaxPooling2D((2, 2)))
#Convolutional Layer 2
model_T2.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
#Pooling Layer 1
model_T2.add(MaxPooling2D((2, 2)))
#Flatten Layer
model_T2.add(Flatten())
#Fully-Connected Layer 1
model_T2.add(Dense(128, activation='relu'))
#Fully-Connected Layer 2 (Output Layer)
model_T2.add(Dense(10, activation='softmax'))




### Step 2: Compiling the Model

In [10]:
# Defining Optimizer, loss function and evaluation metrics
from keras.optimizers import Adam
custom_adam = Adam(lr=0.002)

model_T2.compile(loss='categorical_crossentropy', optimizer=custom_adam, metrics=['accuracy'])

In [11]:
# Model Structure
model_T2.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 3136)              0         
_________________________________________________________________
dense_7 (Dense)              (None, 128)               401536    
_________________________________________________________________
dense_8 (Dense)              (None, 10)               

### Step 3: Training the Model

In [12]:
# Setting a random state so that comparing the evaluation metrics is possible
tf.set_random_seed(1)
np.random.seed(1)

model_T2.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x17b3c5a31c8>

### Step 4: Evaluating the model with Testing dataset

In [13]:
scores = model_T2.evaluate(x_test, y_test, verbose=1)
print("%s: %.2f%%" % (model_T1.metrics_names[1], scores[1]*100))

accuracy: 91.14%


### Step 5: Changing the Model Structure to get better evaluation results

Changing the model structure by increasing the number of filters in the convolutional layer to 64 and 128 respectively

In [14]:
model_T2 = Sequential()
model_T2.add(Conv2D(64, (3, 3), activation='relu', input_shape=(28, 28, 1),
                   padding='same'))
model_T2.add(MaxPooling2D((2, 2)))
model_T2.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model_T2.add(MaxPooling2D((2, 2)))
model_T2.add(Flatten())
model_T2.add(Dense(128, activation='relu'))
model_T2.add(Dense(10, activation='softmax'))

from keras.optimizers import Adam
custom_adam = Adam(lr=0.002)

model_T2.compile(loss='categorical_crossentropy', optimizer=custom_adam, metrics=['accuracy'])

# Setting a random state so that comparing the evaluation metrics is possible
tf.set_random_seed(1)
np.random.seed(1)
model_T2.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test))

scores = model_T2.evaluate(x_test, y_test, verbose=1)
print("%s: %.2f%%" % (model_T1.metrics_names[1], scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
accuracy: 91.58%


## Task 3: Change the type of optimizer or learning rate that you applied in the previous tasks, and see how these changes can influence model performance

In [15]:
#Testing custom sgd with different learning rates
lrate = (0.002, 0.004, 0.006, 0.008)
for lr in lrate:
    epochs = 10
    decay = lr/epochs
    sgd = SGD(lr=lr, momentum=0.7, decay=decay, nesterov=False) #Stochastic gradient descent optimizer
    # Compile model
    model_T2.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    model_T2.fit(x_train, y_train, epochs=epochs, batch_size=32, validation_data=(x_test, y_test), verbose = 0)
    scores = model_T2.evaluate(x_test, y_test, verbose=1)
    print("learning rate: ", lr)
    print("%s: %.2f%%" % (model_T2.metrics_names[1], scores[1]*100))

learning rate:  0.002
accuracy: 92.65%
learning rate:  0.004
accuracy: 92.74%
learning rate:  0.006
accuracy: 92.73%
learning rate:  0.008
accuracy: 92.80%
