In [1]:
# loding necessary packages
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.utils import to_categorical

In [2]:
# loading dataset
from keras.datasets import fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

x_train = x_train.reshape((x_train.shape[0], 28, 28, 1))
x_test = x_test.reshape((x_test.shape[0], 28, 28, 1))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
# Convert labels to one-hot encoding
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

In [8]:
# base model with dense layers
model = Sequential()
model.add(Flatten(input_shape=(28, 28, 1)))
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

# generating model architecture
model.summary()

  super().__init__(**kwargs)


In [9]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])  # categorical cross entropy for multi-class classification

In [10]:
history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test)) # validation has been set from testing data

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.6789 - loss: 9.6132 - val_accuracy: 0.7451 - val_loss: 0.7304
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.7639 - loss: 0.6539 - val_accuracy: 0.7900 - val_loss: 0.6114
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.8020 - loss: 0.5557 - val_accuracy: 0.8005 - val_loss: 0.5596
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.8151 - loss: 0.5270 - val_accuracy: 0.7881 - val_loss: 0.6409
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.8267 - loss: 0.4959 - val_accuracy: 0.8253 - val_loss: 0.5256
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8324 - loss: 0.4773 - val_accuracy: 0.8165 - val_loss: 0.5747
Epoch 7/10
[1m

In [11]:
# test accuracy and loss
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)
print(f'Test accuracy: {test_accuracy:.4f}')

313/313 - 0s - 1ms/step - accuracy: 0.8327 - loss: 0.5063
Test accuracy: 0.8327


In [6]:
# history = model.fit(x_train, y_train, epochs=100, batch_size=10, validation_split=0.1, verbose=0)

In [7]:
# test accuracy and loss with epoch=100
# test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)
# print(f'Test accuracy: {test_accuracy:.4f}')

313/313 - 1s - 2ms/step - accuracy: 0.7105 - loss: 2.0475
Test accuracy: 0.7105


A larger epochs=100 resulted in only 71% accuracy and a loss of 2.04. Therefore, a baseline epochs=10 has been established to ensure optimal model performance with this dataset.
Training for fewer epochs not only conserves time and computational resources but also helps mitigate the risk of overfitting. The substantial decrease in accuracy and the increase in loss obseved at 100 epochs suggest that the model is likely learning noise in the training data rather than generalising effectively. Thus, epochs=10 configuration appears to provide a more balanced and reliable performance.

## Experimenting with the model to improve network performance

**Increasing the number of layers and neurons**

In [None]:
model = Sequential()
model.add(Flatten(input_shape=(28, 28, 1)))
model.add(Dense(256, activation='relu'))  # Increased neurons
model.add(Dense(128, activation='relu'))  # Additional hidden layer
model.add(Dense(10, activation='softmax'))

model.summary()

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test))

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.7244 - loss: 7.2099 - val_accuracy: 0.7855 - val_loss: 0.6903
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.8119 - loss: 0.5862 - val_accuracy: 0.8005 - val_loss: 0.5907
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.8259 - loss: 0.5035 - val_accuracy: 0.8236 - val_loss: 0.5559
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 6ms/step - accuracy: 0.8345 - loss: 0.4743 - val_accuracy: 0.8170 - val_loss: 0.5719
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 6ms/step - accuracy: 0.8442 - loss: 0.4382 - val_accuracy: 0.8368 - val_loss: 0.4826
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.8500 - loss: 0.4243 - val_accuracy: 0.8485 - val_loss: 0.4295
Epoch 7/10

In [None]:
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)
print(f'Test accuracy: {test_accuracy:.4f}')

313/313 - 1s - 2ms/step - accuracy: 0.8479 - loss: 0.4491
Test accuracy: 0.8479


Accuracy increased to 84.79%

**Using dropout regularisation**

In [None]:
from keras.layers import Dropout

model = Sequential()
model.add(Flatten(input_shape=(28, 28, 1)))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))  # 50% dropout
model.add(Dense(128, activation='relu'))
model.add(Dense(10, activation='softmax'))

model.summary()

  super().__init__(**kwargs)


In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test))

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.4827 - loss: 7.9867 - val_accuracy: 0.6490 - val_loss: 1.0851
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.4357 - loss: 1.6326 - val_accuracy: 0.4603 - val_loss: 1.5255
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.3861 - loss: 1.6959 - val_accuracy: 0.5117 - val_loss: 1.3805
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.4024 - loss: 1.6223 - val_accuracy: 0.5440 - val_loss: 1.1288
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 6ms/step - accuracy: 0.4157 - loss: 1.5063 - val_accuracy: 0.5348 - val_loss: 1.1249
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 5ms/step - accuracy: 0.4348 - loss: 1.3928 - val_accuracy: 0.5570 - val_loss: 1.0617
Epoch 7/10


In [None]:
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)
print(f'Test accuracy: {test_accuracy:.4f}')

313/313 - 1s - 2ms/step - accuracy: 0.6217 - loss: 0.9393
Test accuracy: 0.6217


Accuracy decreased dramatically and loss increased to 0.9.

**Different activation functions**

In [None]:
from keras.layers import LeakyReLU

model = Sequential()
model.add(Flatten(input_shape=(28, 28, 1)))
model.add(Dense(256))
model.add(LeakyReLU(alpha=0.1))  # Leaky ReLU
model.add(Dense(128))
model.add(LeakyReLU(alpha=0.1))  # Leaky ReLU
model.add(Dense(10, activation='softmax'))

model.summary()

  super().__init__(**kwargs)


In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test))

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.7296 - loss: 6.6477 - val_accuracy: 0.7795 - val_loss: 0.8272
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.8036 - loss: 0.6502 - val_accuracy: 0.7946 - val_loss: 0.7519
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8146 - loss: 0.6701 - val_accuracy: 0.8035 - val_loss: 0.7455
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.8134 - loss: 0.7055 - val_accuracy: 0.8332 - val_loss: 0.6779
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.8260 - loss: 0.6033 - val_accuracy: 0.8276 - val_loss: 0.5786
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 5ms/step - accuracy: 0.8364 - loss: 0.5568 - val_accuracy: 0.8213 - val_loss: 0.6123
Epoch 7/10


In [None]:
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)
print(f'Test accuracy: {test_accuracy:.4f}')

313/313 - 1s - 2ms/step - accuracy: 0.8445 - loss: 0.5253
Test accuracy: 0.8445


Accuracy didn't increased, however, loss increased from what we attained with the first experiment.