# CNN with Regularization( Fashion-mnist)
#### Design and Implementation of the a CNN model (with 4+ convolutional layers)
- While Recording the Training & Test accuracy of the following
  1. Base Model
  2. L1 Regularization
  3. L2 Regularization
  4. Dropout
  5. L2 (or L1) + Dropout

### Step 0: Setup and Imports

In [6]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

import warnings
warnings.filterwarnings('ignore')
plt.style.use('ggplot')

### Step 1: Load and Preprocess Fashion Mnist

In [7]:
(x_train_fm, y_train_fm),(x_test_fm, y_test_fm) = tf.keras.datasets.fashion_mnist.load_data()

# Normalize to [0,1]
x_train = np.expand_dims(x_train_fm, -1,).astype('float32')/255.0
x_test = np.expand_dims(x_test_fm, -1).astype('float32')/255.0


### Step 2: Define Models
* Base Model Function
* Variants with Regularization

In [24]:
def build_base_model(input_shape=(28,28,1), num_classes=10):
    model = tf.keras.Sequential([
        # Layer 1
        tf.keras.layers.Conv2D(32, (3,3), activation='relu',padding='same', input_shape=input_shape),
        tf.keras.layers.MaxPooling2D(2,2),

        # Layer 2
        tf.keras.layers.Conv2D(64, (3,3), activation='relu',padding='same'),
        tf.keras.layers.MaxPooling2D(2,2),

        # Layer 3
        tf.keras.layers.Conv2D(128, (3,3), activation='relu',padding='same'),
        tf.keras.layers.MaxPooling2D(2,2),

        # Layer 4
        tf.keras.layers.Conv2D(256, (3,3), activation='relu', padding='same'),
        tf.keras.layers.Flatten(),

        # Dense layer
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])
    return model

#### Step 2b: Variants with Regularization

In [31]:
from tensorflow.keras import regularizers

# a L1
def build_l1_model(input_shape=(28,28, 1), num_classes=10):
    return tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, 3, activation='relu', padding='same', input_shape=input_shape,kernel_regularizer=regularizers.l1(.001)),
        tf.keras.layers.MaxPooling2D(2,2),

        tf.keras.layers.Conv2D(64, 3, activation='relu',padding='same', kernel_regularizer=regularizers.l1(.001)),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l1(.001)),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])

# b L2
def build_l2_model(input_shape=(28,28, 1), num_classes=10):
    return tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, 3, activation='relu', padding='same', input_shape=input_shape,kernel_regularizer=regularizers.l2(.001)),
        tf.keras.layers.MaxPooling2D(2,2),

        tf.keras.layers.Conv2D(64, 3, activation='relu',padding='same', kernel_regularizer=regularizers.l2(.001)),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(.001)),
        tf.keras.layers.Dense(num_classes, activation='softmax')
    ])

# C Dropout
def build_dropout_model(input_shape=(28,28,1), num_classes=10):
    return tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, 3, activation="relu", padding="same", input_shape=input_shape),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Conv2D(64, 3, activation="relu", padding="same"),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(num_classes, activation="softmax")
    ])

# (d) L2 + Dropout
def build_l2_dropout_model(input_shape=(28,28,1), num_classes=10):
    return tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, 3, activation="relu", padding="same", input_shape=input_shape,
                               kernel_regularizer=regularizers.l2(0.001)),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Conv2D(64, 3, activation="relu", padding="same", kernel_regularizer=regularizers.l2(0.001)),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation="relu", kernel_regularizer=regularizers.l2(0.001)),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(num_classes, activation="softmax")
    ])

### Step 3: Training Utility

In [32]:
def train_and_evaluate(model, x_train, y_train, x_test, y_test, epochs=10):
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy',metrics=['accuracy'])
    history = model.fit(x_train, y_train, epochs=epochs, batch_size=128, validation_split=.1,verbose=2)
    test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
    print(f'Test Accuracy: {test_acc:.4}')
    return history, test_acc

### Step 4: Run Experiments

In [33]:
# a. Base
x_train, x_test, y_train, y_test = x_train_fm, x_test_fm, y_train_fm, y_test_fm
base_history, base_acc = train_and_evaluate(build_base_model(), x_train, y_train, x_test, y_test)

# b. L1
l1_history, l1_acc = train_and_evaluate(build_l1_model(), x_train, y_train, x_test, y_test)

# c. L2
l2_history, l2_acc = train_and_evaluate(build_l2_model(), x_train, y_train, x_test, y_test)

# d. Dropout
drop_history, drop_acc = train_and_evaluate(build_dropout_model(), x_train, y_train, x_test, y_test)

# e. L2 + Dropout
combo_history, combo_acc = train_and_evaluate(build_l2_dropout_model(), x_train, y_train, x_test, y_test)


Epoch 1/10
422/422 - 7s - 17ms/step - accuracy: 0.8240 - loss: 0.6873 - val_accuracy: 0.8837 - val_loss: 0.3181
Epoch 2/10
422/422 - 3s - 6ms/step - accuracy: 0.8939 - loss: 0.2895 - val_accuracy: 0.9038 - val_loss: 0.2738
Epoch 3/10
422/422 - 2s - 6ms/step - accuracy: 0.9089 - loss: 0.2447 - val_accuracy: 0.9048 - val_loss: 0.2722
Epoch 4/10
422/422 - 2s - 6ms/step - accuracy: 0.9197 - loss: 0.2172 - val_accuracy: 0.9073 - val_loss: 0.2500
Epoch 5/10
422/422 - 2s - 6ms/step - accuracy: 0.9284 - loss: 0.1909 - val_accuracy: 0.9058 - val_loss: 0.2550
Epoch 6/10
422/422 - 2s - 6ms/step - accuracy: 0.9353 - loss: 0.1721 - val_accuracy: 0.9138 - val_loss: 0.2565
Epoch 7/10
422/422 - 2s - 6ms/step - accuracy: 0.9384 - loss: 0.1639 - val_accuracy: 0.9142 - val_loss: 0.2528
Epoch 8/10
422/422 - 2s - 6ms/step - accuracy: 0.9461 - loss: 0.1440 - val_accuracy: 0.9128 - val_loss: 0.2539
Epoch 9/10
422/422 - 2s - 6ms/step - accuracy: 0.9511 - loss: 0.1315 - val_accuracy: 0.9123 - val_loss: 0.2706


### step 5: Notes 
* Regularization -> Discourages large weights (simpler model)
* Dropout -> randomly disables neurons during training -> prevents co-adaptation
* L1 vs L2:
  - L1 -> Promotes sparsity(some weights become zero)
  - L2 -> weight decay, more stable.
* Best practice -> L2 + Dropout usually performs storngest on Fashion-MNISt as per the results