In [3]:
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Conv2D, BatchNormalization, MaxPooling2D,
    Dropout, GlobalAveragePooling2D, Dense
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import (
    EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
)
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Load and preprocess data
(x_train_full, y_train_full), (x_test, y_test) = mnist.load_data()

# Reshape and normalize
x_train_full = np.expand_dims(x_train_full, -1).astype('float32') / 255
x_test = np.expand_dims(x_test, -1).astype('float32') / 255

# Convert labels to one-hot encoding
y_train_full = to_categorical(y_train_full, 10)
y_test = to_categorical(y_test, 10)

# Train-validation split
x_train, x_val, y_train, y_val = train_test_split(
    x_train_full, y_train_full, test_size=0.2, random_state=42
)

# Data augmentation generator (reduced augmentation for speed)
datagen = ImageDataGenerator(
    rotation_range=8,  # Reduced from 10
    width_shift_range=0.08,  # Reduced from 0.1
    height_shift_range=0.08,  # Reduced from 0.1
    zoom_range=0.08  # Reduced from 0.1
)
datagen.fit(x_train)

# Build optimized CNN model with reduced complexity
model = Sequential([
    # First block - keep initial filters for good feature extraction
    Conv2D(32, (3,3), activation='relu', padding='same', input_shape=(28,28,1)),
    BatchNormalization(),
    MaxPooling2D((2,2)),
    Dropout(0.25),
    
    # Second block - reduced filters from 64 to 48
    Conv2D(48, (3,3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D((2,2)),
    Dropout(0.25),
    
    # Third block - reduced filters from 128 to 64, removed one Conv2D layer
    Conv2D(64, (3,3), activation='relu', padding='same'),
    BatchNormalization(),
    GlobalAveragePooling2D(),
    Dropout(0.5),
    
    Dense(10, activation='softmax')
])

# Compile with slightly higher learning rate for faster convergence
model.compile(
    optimizer=Adam(learning_rate=0.0003),  # Increased from 0.0001
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Callbacks with adjusted parameters for faster training
callbacks = [
    EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True),  # Reduced from 10
    ModelCheckpoint('best_model.h5', save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=3, min_lr=1e-6)  # Increased factor from 0.2
]

# Train model with larger batch size
history = model.fit(
    datagen.flow(x_train, y_train, batch_size=128),  # Increased from 64
    steps_per_epoch=len(x_train) // 128,
    epochs=50,  # Reduced from 100
    validation_data=(x_val, y_val),
    callbacks=callbacks,
    verbose=1
)

# Save final model
model.save('mnist_cnn_final.h5')

  self._warn_if_super_not_called()


Epoch 1/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 191ms/step - accuracy: 0.4379 - loss: 1.6909



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 202ms/step - accuracy: 0.4383 - loss: 1.6900 - val_accuracy: 0.2264 - val_loss: 4.5073 - learning_rate: 3.0000e-04
Epoch 2/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 183ms/step - accuracy: 0.8434 - loss: 0.6968



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 193ms/step - accuracy: 0.8434 - loss: 0.6966 - val_accuracy: 0.6115 - val_loss: 1.0390 - learning_rate: 3.0000e-04
Epoch 3/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step - accuracy: 0.9062 - loss: 0.4154



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 186ms/step - accuracy: 0.9063 - loss: 0.4153 - val_accuracy: 0.9413 - val_loss: 0.2236 - learning_rate: 3.0000e-04
Epoch 4/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step - accuracy: 0.9272 - loss: 0.3039



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 188ms/step - accuracy: 0.9272 - loss: 0.3038 - val_accuracy: 0.9689 - val_loss: 0.1309 - learning_rate: 3.0000e-04
Epoch 5/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step - accuracy: 0.9422 - loss: 0.2410



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 183ms/step - accuracy: 0.9422 - loss: 0.2410 - val_accuracy: 0.9729 - val_loss: 0.1137 - learning_rate: 3.0000e-04
Epoch 6/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 174ms/step - accuracy: 0.9474 - loss: 0.2030



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 184ms/step - accuracy: 0.9474 - loss: 0.2030 - val_accuracy: 0.9752 - val_loss: 0.0967 - learning_rate: 3.0000e-04
Epoch 7/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step - accuracy: 0.9557 - loss: 0.1736



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 187ms/step - accuracy: 0.9557 - loss: 0.1736 - val_accuracy: 0.9811 - val_loss: 0.0722 - learning_rate: 3.0000e-04
Epoch 8/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step - accuracy: 0.9594 - loss: 0.1536



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 182ms/step - accuracy: 0.9594 - loss: 0.1536 - val_accuracy: 0.9822 - val_loss: 0.0642 - learning_rate: 3.0000e-04
Epoch 9/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 182ms/step - accuracy: 0.9604 - loss: 0.1438 - val_accuracy: 0.9783 - val_loss: 0.0772 - learning_rate: 3.0000e-04
Epoch 10/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 186ms/step - accuracy: 0.9621 - loss: 0.1337 - val_accuracy: 0.9811 - val_loss: 0.0664 - learning_rate: 3.0000e-04
Epoch 11/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 194ms/step - accuracy: 0.9676 - loss: 0.1204



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 205ms/step - accuracy: 0.9676 - loss: 0.1204 - val_accuracy: 0.9816 - val_loss: 0.0630 - learning_rate: 3.0000e-04
Epoch 12/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step - accuracy: 0.9668 - loss: 0.1177



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 186ms/step - accuracy: 0.9668 - loss: 0.1177 - val_accuracy: 0.9848 - val_loss: 0.0536 - learning_rate: 3.0000e-04
Epoch 13/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step - accuracy: 0.9681 - loss: 0.1134



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 186ms/step - accuracy: 0.9681 - loss: 0.1134 - val_accuracy: 0.9864 - val_loss: 0.0478 - learning_rate: 3.0000e-04
Epoch 14/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 192ms/step - accuracy: 0.9699 - loss: 0.1084 - val_accuracy: 0.9852 - val_loss: 0.0518 - learning_rate: 3.0000e-04
Epoch 15/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 174ms/step - accuracy: 0.9706 - loss: 0.1042



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 185ms/step - accuracy: 0.9706 - loss: 0.1042 - val_accuracy: 0.9868 - val_loss: 0.0430 - learning_rate: 3.0000e-04
Epoch 16/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 184ms/step - accuracy: 0.9730 - loss: 0.0982 - val_accuracy: 0.9872 - val_loss: 0.0443 - learning_rate: 3.0000e-04
Epoch 17/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 183ms/step - accuracy: 0.9725 - loss: 0.0974 - val_accuracy: 0.9844 - val_loss: 0.0527 - learning_rate: 3.0000e-04
Epoch 18/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step - accuracy: 0.9736 - loss: 0.0909



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 187ms/step - accuracy: 0.9736 - loss: 0.0909 - val_accuracy: 0.9887 - val_loss: 0.0392 - learning_rate: 3.0000e-04
Epoch 19/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 180ms/step - accuracy: 0.9743 - loss: 0.0897



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 193ms/step - accuracy: 0.9743 - loss: 0.0897 - val_accuracy: 0.9893 - val_loss: 0.0350 - learning_rate: 3.0000e-04
Epoch 20/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 187ms/step - accuracy: 0.9731 - loss: 0.0919 - val_accuracy: 0.9895 - val_loss: 0.0355 - learning_rate: 3.0000e-04
Epoch 21/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step - accuracy: 0.9760 - loss: 0.0850



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 123ms/step - accuracy: 0.9760 - loss: 0.0850 - val_accuracy: 0.9902 - val_loss: 0.0326 - learning_rate: 3.0000e-04
Epoch 22/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 179ms/step - accuracy: 0.9757 - loss: 0.0804 - val_accuracy: 0.9907 - val_loss: 0.0339 - learning_rate: 3.0000e-04
Epoch 23/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 179ms/step - accuracy: 0.9767 - loss: 0.0794 - val_accuracy: 0.9897 - val_loss: 0.0355 - learning_rate: 3.0000e-04
Epoch 24/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 180ms/step - accuracy: 0.9792 - loss: 0.0751 - val_accuracy: 0.9912 - val_loss: 0.0327 - learning_rate: 3.0000e-04
Epoch 25/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step - accuracy: 0.9783 - loss: 0.0754



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 180ms/step - accuracy: 0.9783 - loss: 0.0754 - val_accuracy: 0.9915 - val_loss: 0.0295 - learning_rate: 9.0000e-05
Epoch 26/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 177ms/step - accuracy: 0.9799 - loss: 0.0700 - val_accuracy: 0.9916 - val_loss: 0.0302 - learning_rate: 9.0000e-05
Epoch 27/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step - accuracy: 0.9787 - loss: 0.0720



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 178ms/step - accuracy: 0.9787 - loss: 0.0720 - val_accuracy: 0.9918 - val_loss: 0.0281 - learning_rate: 9.0000e-05
Epoch 28/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 177ms/step - accuracy: 0.9779 - loss: 0.0741 - val_accuracy: 0.9909 - val_loss: 0.0303 - learning_rate: 9.0000e-05
Epoch 29/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 182ms/step - accuracy: 0.9789 - loss: 0.0719 - val_accuracy: 0.9912 - val_loss: 0.0286 - learning_rate: 9.0000e-05
Epoch 30/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 187ms/step - accuracy: 0.9805 - loss: 0.0666 - val_accuracy: 0.9920 - val_loss: 0.0285 - learning_rate: 9.0000e-05
Epoch 31/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 187ms/step - accuracy: 0.9811 - loss: 0.0683 - val_accuracy: 0.9920 - val_loss: 0.0292 - learning_rate: 2.7000e-05
Epoch 32/50
[1m375/375[0m [32m━━━━━━━━━



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 188ms/step - accuracy: 0.9810 - loss: 0.0659 - val_accuracy: 0.9919 - val_loss: 0.0279 - learning_rate: 2.7000e-05
Epoch 34/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 188ms/step - accuracy: 0.9801 - loss: 0.0654 - val_accuracy: 0.9921 - val_loss: 0.0282 - learning_rate: 2.7000e-05
Epoch 35/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 179ms/step - accuracy: 0.9808 - loss: 0.0677



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 189ms/step - accuracy: 0.9808 - loss: 0.0677 - val_accuracy: 0.9919 - val_loss: 0.0274 - learning_rate: 2.7000e-05
Epoch 36/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 189ms/step - accuracy: 0.9811 - loss: 0.0653 - val_accuracy: 0.9922 - val_loss: 0.0279 - learning_rate: 2.7000e-05
Epoch 37/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 188ms/step - accuracy: 0.9807 - loss: 0.0669 - val_accuracy: 0.9925 - val_loss: 0.0275 - learning_rate: 2.7000e-05
Epoch 38/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 175ms/step - accuracy: 0.9814 - loss: 0.0667



[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 185ms/step - accuracy: 0.9814 - loss: 0.0667 - val_accuracy: 0.9923 - val_loss: 0.0272 - learning_rate: 2.7000e-05
Epoch 39/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 189ms/step - accuracy: 0.9779 - loss: 0.0717 - val_accuracy: 0.9923 - val_loss: 0.0274 - learning_rate: 2.7000e-05
Epoch 40/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 188ms/step - accuracy: 0.9798 - loss: 0.0680 - val_accuracy: 0.9919 - val_loss: 0.0273 - learning_rate: 2.7000e-05
Epoch 41/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 183ms/step - accuracy: 0.9793 - loss: 0.0688 - val_accuracy: 0.9924 - val_loss: 0.0273 - learning_rate: 2.7000e-05
Epoch 42/50
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 183ms/step - accuracy: 0.9801 - loss: 0.0686 - val_accuracy: 0.9923 - val_loss: 0.0273 - learning_rate: 8.1000e-06
Epoch 43/50
[1m375/375[0m [32m━━━━━━━━━

