In [None]:
import tensorflow.keras as keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Activation
from tensorflow.keras import backend as K

# setting learning parameters
batch_size = 128 # minibatch size
num_classes = 10
epochs = 24 # how many full passes through the full training set

# each minibatch process results in a gradient descent step and model parameter updates

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()




In [4]:
if K.image_data_format() == 'channels_first': #K.image_data_format() returns a string telling us how keras wants the output for the tensorflow backend
    # we need to reformat the data accordingly 
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255 #scaling of the data 
x_test /= 255

y_train = keras.utils.to_categorical(y_train, num_classes) #one hot encoding mapping of the label values
y_test = keras.utils.to_categorical(y_test, num_classes)

In [7]:
# building model with keras, sequentially adding layers 
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), # first argument is number of filters and then kernel size
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25)) #25% probability of dropping out the output
model.add(Flatten())
model.add(Dense(128, activation='relu')) #128 nodes
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

#compile the model, adding loss function, optimiser and metrics to report during training
model.compile(loss=keras.losses.categorical_crossentropy, #multiclass version of binary cross-entropy
              optimizer=keras.optimizers.Adam(), #Adam changes learning rate during training
              metrics=['accuracy'])

print()
print("Model parameters = %d" % model.count_params())
print()
print(model.summary())
print()


Model parameters = 1199882

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 12, 12, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 9216)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               1179776   
_________________________________________________________________
dropout_1 (Dropout)        

2026-02-08 18:54:34.583493: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2026-02-08 18:54:34.589434: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2026-02-08 18:54:34.602074: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [8]:
#training the model
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test)) #here we use all test data as validation, normally we should keep some as final tes data
score = model.evaluate(x_test, y_test, verbose=0) #here normally we would use test data that was not used in fit 
print('Test loss:', score[0])
print('Test accuracy:', score[1])

model.save("/home/kasia/Deep_learning/Models/mnist_cnn_base_model.keras") #keras score

2026-02-08 18:55:57.612133: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2026-02-08 18:55:57.619091: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2188805000 Hz


Epoch 1/24
Epoch 2/24
Epoch 3/24
Epoch 4/24
Epoch 5/24
Epoch 6/24
Epoch 7/24
Epoch 8/24
Epoch 9/24
Epoch 10/24
Epoch 11/24
Epoch 12/24
Epoch 13/24
Epoch 14/24
Epoch 15/24
Epoch 16/24
Epoch 17/24
Epoch 18/24
Epoch 19/24
Epoch 20/24
Epoch 21/24
Epoch 22/24
Epoch 23/24
Epoch 24/24
Test loss: 0.031112566590309143
Test accuracy: 0.993399977684021


In [None]:
#some notes from experiments
# more convolutional layers -more parameters, but sometimes just minor performance increase, this can be mitigated by adding extra pooling layer
# kernel size also matters
# dense layers are most expensive in terms of number of parameters


In [13]:
# optimised model 
model = Sequential()
model.add(Conv2D(32, kernel_size=(5, 5),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu')) # adding extra conv layers followed by maxpooling
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=0,
          validation_data=(x_test[:1000], y_test[:1000])) # here also a split to real test and validation data
score = model.evaluate(x_test[1000:], y_test[1000:], verbose=0)
print('Exp  2: test loss:', score[0], 'test accuracy:', score[1])
model.save("/home/kasia/Deep_learning/Models/mnist_cnn/mnist_cnn_exp2_3_all_model.keras")

Exp  2: test loss: 0.020032132044434547 test accuracy: 0.9950000047683716
