In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Dense, Input, Add, BatchNormalization, Dropout,
                                     Activation, GaussianNoise, LeakyReLU)
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

In [None]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train_flat = x_train.reshape(-1, 784)
x_test_flat = x_test.reshape(-1, 784)
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [None]:
def create_advanced_dense_resnet():
    inputs = Input(shape=(784,))
    x = GaussianNoise(0.1)(inputs)
    x = Dense(1024)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU()(x)

    def res_block(x, units):
        shortcut = Dense(units)(x)
        x1 = Dense(units)(x)
        x1 = BatchNormalization()(x1)
        x1 = LeakyReLU()(x1)
        x2 = Dense(units)(x1)
        x2 = BatchNormalization()(x2)
        out = Add()([shortcut, x2])
        out = LeakyReLU()(out)
        out = Dropout(0.4)(out)
        return out

    x = res_block(x, 512)
    x = res_block(x, 512)
    x = res_block(x, 256)
    x = res_block(x, 128)
    x = res_block(x, 64)
    x = res_block(x, 64)

    x = Dense(64)(x)
    x = LeakyReLU()(x)
    outputs = Dense(10, activation='softmax')(x)

    return Model(inputs, outputs)


In [None]:

model = create_advanced_dense_resnet()
optimizer = Adam(learning_rate=0.0008)

model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

In [None]:
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)


In [None]:
history = model.fit(x_train_flat, y_train,
                    validation_split=0.2,
                    epochs=70,
                    batch_size=64,
                    callbacks=[lr_scheduler, early_stop],
                    verbose=2)


Epoch 1/70
750/750 - 63s - 84ms/step - accuracy: 0.6522 - loss: 1.0092 - val_accuracy: 0.8147 - val_loss: 0.5386 - learning_rate: 8.0000e-04
Epoch 2/70
750/750 - 81s - 108ms/step - accuracy: 0.7790 - loss: 0.6477 - val_accuracy: 0.8447 - val_loss: 0.4481 - learning_rate: 8.0000e-04
Epoch 3/70
750/750 - 49s - 65ms/step - accuracy: 0.8079 - loss: 0.5826 - val_accuracy: 0.8238 - val_loss: 0.5356 - learning_rate: 8.0000e-04
Epoch 4/70
750/750 - 49s - 65ms/step - accuracy: 0.8195 - loss: 0.5485 - val_accuracy: 0.8049 - val_loss: 0.5037 - learning_rate: 8.0000e-04
Epoch 5/70
750/750 - 83s - 111ms/step - accuracy: 0.8255 - loss: 0.5271 - val_accuracy: 0.8572 - val_loss: 0.4187 - learning_rate: 8.0000e-04
Epoch 6/70
750/750 - 50s - 67ms/step - accuracy: 0.8362 - loss: 0.4985 - val_accuracy: 0.8658 - val_loss: 0.3882 - learning_rate: 8.0000e-04
Epoch 7/70
750/750 - 80s - 107ms/step - accuracy: 0.8427 - loss: 0.4771 - val_accuracy: 0.8556 - val_loss: 0.4178 - learning_rate: 8.0000e-04
Epoch 8/70

In [None]:
test_loss, test_acc = model.evaluate(x_test_flat, y_test, verbose=0)
print(f"\nFinal Test Accuracy: {test_acc:.4f}")


Final Test Accuracy: 0.8961


**ACCURACY: 89.61%**

CHANGES:

The number of residual blocks was increased from 2 to over 5 to deepen the network and improve feature learning.

The total number of hidden layers was expanded to more than 10, allowing the model to learn more complex patterns.

The number of neurons in each dense layer was increased from 256/128/64 to a hierarchy like 1024 → 512 → 256 → 128 → 64 for better representation.

The activation function was changed from ReLU to LeakyReLU to avoid the dying neuron problem and allow small gradient flow for negative inputs.

A GaussianNoise layer was added at the input to act as a regularizer and simulate data variability.

Dropout rate was increased from 0.3 to 0.4 within residual blocks to further prevent overfitting.

The optimizer's learning rate was manually tuned to 0.0008 for more stable convergence.

The batch size was reduced from 128 to 64 to increase update frequency and improve generalization.

The training duration was extended from 30 to 60–70 epochs, giving the model more time to learn deeper patterns.

EarlyStopping was added alongside ReduceLROnPlateau to stop training automatically when no further validation improvement was seen.