**APPROACH 1: Elastic regularisation applied indirectly & Grid Searc for hyper parameters** ***only l1 used***

In [None]:
from keras.datasets import cifar10
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras import regularizers

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Normalize and convert to float32
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Define the input shape
input_img = Input(shape=(32, 32, 3))

# Encoder part of the CAE
x = Conv2D(64, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2))(x)

# Flatten the encoded output for MLP
flatten = Flatten()(encoded)

# Create a model for encoding features
encoder_model = Model(input_img, flatten)

# Obtain encoded features for training and validation sets
flatten_train_np = encoder_model.predict(x_train)
flatten_test_np = encoder_model.predict(x_test)

# Hyperparameters
batch_sizes = [8, 16, 32, 64]
learning_rates = [0.001, 0.01]
l1_ratio = 0.5  # Elastic Net mixing parameter

# Results storage
best_accuracy = 0
best_params = {}

# Grid search over hyperparameters
for batch_size in batch_sizes:
    for lr in learning_rates:
        # MLP for recognition
        mlp = Sequential([
            Dense(400, activation='relu', input_shape=(flatten_train_np.shape[1],)),
            BatchNormalization(),
            Dropout(0.5),
            Dense(64, activation='relu'),
            BatchNormalization(),
            Dropout(0.5),
            Dense(10, activation='softmax', kernel_regularizer=regularizers.l1_l2(l1=l1_ratio, l2=l1_ratio))
        ])

        # Compile the MLP
        optimizer = Adam(learning_rate=lr)
        mlp.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

        # Train the MLP on the encoded features
        x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(
            flatten_train_np, y_train, test_size=0.2, random_state=42
        )

        early_stopping_monitor = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
        mlp_history = mlp.fit(x_train_split, y_train_split,
                              epochs=100,
                              batch_size=batch_size,
                              validation_data=(x_val_split, y_val_split),
                              callbacks=[early_stopping_monitor],
                              verbose=0)

        # Evaluate MLP on test set
        test_loss, test_accuracy = mlp.evaluate(flatten_test_np, y_test, verbose=0)

        # Store results
        if test_accuracy > best_accuracy:
            best_accuracy = test_accuracy
            best_params = {'Batch Size': batch_size, 'Learning Rate': lr}

        print(f'Batch Size: {batch_size}, Learning Rate: {lr}, Test Accuracy: {test_accuracy:.4f}')

print(f'Best Accuracy: {best_accuracy:.4f} with Batch Size: {best_params["Batch Size"]}, Learning Rate: {best_params["Learning Rate"]}')



Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Batch Size: 8, Learning Rate: 0.001, Test Accuracy: 0.2390


KeyboardInterrupt: 



```
# Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
170498071/170498071 [==============================] - 2s 0us/step
1563/1563 [==============================] - 7s 3ms/step
313/313 [==============================] - 1s 4ms/step
Batch Size: 8, Learning Rate: 0.001, Test Accuracy: 0.2378
Batch Size: 8, Learning Rate: 0.01, Test Accuracy: 0.2418
Batch Size: 16, Learning Rate: 0.001, Test Accuracy: 0.2594
Batch Size: 16, Learning Rate: 0.01, Test Accuracy: 0.2388
Batch Size: 32, Learning Rate: 0.001, Test Accuracy: 0.2368
Batch Size: 32, Learning Rate: 0.01, Test Accuracy: 0.2548
Batch Size: 64, Learning Rate: 0.001, Test Accuracy: 0.0969
Batch Size: 64, Learning Rate: 0.01, Test Accuracy: 0.2800
Best Accuracy: 0.2800 with Batch Size: 64, Learning Rate: 0.01
```



**Approach 2 :  *l1 and l2 both used***

In [None]:
from keras.datasets import cifar10
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from keras.regularizers import l1_l2
import matplotlib.pyplot as plt

In [None]:
# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [None]:
# Normalize and convert to float32
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Define the input shape
input_img = Input(shape=(32, 32, 3))

# Encoder part of the CAE
x = Conv2D(64, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2))(x)

# Flatten the encoded output for MLP
flatten = Flatten()(encoded)

# Create a model for encoding features
encoder_model = Model(input_img, flatten)

In [None]:
# Obtain encoded features for training and validation sets
flatten_train_np = encoder_model.predict(x_train)
flatten_test_np = encoder_model.predict(x_test)



In [None]:
# Hyperparameters for Elastic Net regularization
alpha = 0.5  # Mixing parameter (trade-off between L1 and L2)
rho = 0.5    # Ratio of L1 penalty to total penalty

# Results storage
best_accuracy = 0
best_params = {}

# Grid search over hyperparameters
batch_sizes = [8, 16, 32, 64]
learning_rates = [0.001, 0.01]

In [None]:
for batch_size in batch_sizes:
    for lr in learning_rates:
        # MLP for recognition with Elastic Net regularization
        mlp = Sequential([
            Dense(400, activation='relu', input_shape=(flatten_train_np.shape[1],)),
            BatchNormalization(),
            Dropout(0.5),
            Dense(64, activation='relu'),
            BatchNormalization(),
            Dropout(0.5),
            Dense(10, activation='softmax', kernel_regularizer=l1_l2(l1=rho * alpha, l2=(1 - rho) * alpha))
        ])

        # Compile the MLP
        optimizer = Adam(learning_rate=lr)
        mlp.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

        # Train the MLP on the encoded features
        x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(
            flatten_train_np, y_train, test_size=0.2, random_state=42
        )

        early_stopping_monitor = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
        mlp_history = mlp.fit(x_train_split, y_train_split,
                              epochs=100,
                              batch_size=batch_size,
                              validation_data=(x_val_split, y_val_split),
                              callbacks=[early_stopping_monitor],
                              verbose=2)

        # Evaluate MLP on test set
        test_loss, test_accuracy = mlp.evaluate(flatten_test_np, y_test, verbose=0)

        # Store results
        if test_accuracy > best_accuracy:
            best_accuracy = test_accuracy
            best_params = {'Batch Size': batch_size, 'Learning Rate': lr}

        print(f'Batch Size: {batch_size}, Learning Rate: {lr}, Test Accuracy: {test_accuracy:.4f}')

print(f'Best Accuracy: {best_accuracy:.4f} with Batch Size: {best_params["Batch Size"]}, Learning Rate: {best_params["Learning Rate"]}')

Epoch 1/100
5000/5000 - 20s - loss: 2.9384 - accuracy: 0.1153 - val_loss: 2.3381 - val_accuracy: 0.1089 - 20s/epoch - 4ms/step
Epoch 2/100
5000/5000 - 19s - loss: 2.3515 - accuracy: 0.1428 - val_loss: 2.3755 - val_accuracy: 0.1731 - 19s/epoch - 4ms/step
Epoch 3/100
5000/5000 - 20s - loss: 2.4004 - accuracy: 0.1911 - val_loss: 2.3412 - val_accuracy: 0.2317 - 20s/epoch - 4ms/step
Epoch 4/100
5000/5000 - 19s - loss: 2.3941 - accuracy: 0.2159 - val_loss: 2.3410 - val_accuracy: 0.2414 - 19s/epoch - 4ms/step
Epoch 5/100
5000/5000 - 20s - loss: 2.3915 - accuracy: 0.2169 - val_loss: 2.3173 - val_accuracy: 0.2430 - 20s/epoch - 4ms/step
Epoch 6/100
5000/5000 - 18s - loss: 2.3943 - accuracy: 0.2193 - val_loss: 2.3461 - val_accuracy: 0.2356 - 18s/epoch - 4ms/step
Epoch 7/100
5000/5000 - 19s - loss: 2.4096 - accuracy: 0.2130 - val_loss: 2.3295 - val_accuracy: 0.2232 - 19s/epoch - 4ms/step
Epoch 8/100
5000/5000 - 19s - loss: 2.4014 - accuracy: 0.2165 - val_loss: 2.3066 - val_accuracy: 0.2443 - 19s/e



```
 Best Accuracy: 0.3917 with Batch Size: 64, Learning Rate: 0.001
```



## Grid search is a systematic method, but it can be computationally expensive, especially when dealing with a large number of hyperparameters and their possible values.

***Approach 3:Bayesian***

In [None]:
from keras.datasets import cifar10
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.regularizers import l1_l2
from hyperopt import fmin, tpe, hp
from sklearn.model_selection import train_test_split

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Normalize and convert to float32
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Define the input shape
input_img = Input(shape=(32, 32, 3))

# Encoder part of the CAE
x = Conv2D(64, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2))(x)

# Flatten the encoded output for MLP
flatten = Flatten()(encoded)

# Create a model for encoding features
encoder_model = Model(input_img, flatten)

# Obtain encoded features for training and validation sets
flatten_train_np = encoder_model.predict(x_train)
flatten_test_np = encoder_model.predict(x_test)

# Define the objective function for Bayesian optimization
def objective(params, flatten_train_np=flatten_train_np, y_train=y_train, flatten_test_np=flatten_test_np, y_test=y_test):
    # Extract hyperparameters
    batch_size = int(params['batch_size'])
    lr = params['lr']

    # MLP for recognition with Elastic Net regularization
    mlp = Sequential([
        Dense(400, activation='relu', input_shape=(flatten_train_np.shape[1],)),
        BatchNormalization(),
        Dropout(0.5),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax', kernel_regularizer=l1_l2(l1=params['rho'] * params['alpha'], l2=(1 - params['rho']) * params['alpha']))
    ])

    # Compile the MLP
    optimizer = Adam(learning_rate=lr)
    mlp.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Train the MLP on the encoded features
    x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(
        flatten_train_np, y_train, test_size=0.2, random_state=42
    )

    early_stopping_monitor = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
    mlp_history = mlp.fit(x_train_split, y_train_split,
                          epochs=100,
                          batch_size=batch_size,
                          validation_data=(x_val_split, y_val_split),
                          callbacks=[early_stopping_monitor],
                          verbose=0)

    # Evaluate MLP on test set
    test_loss, test_accuracy = mlp.evaluate(flatten_test_np, y_test, verbose=0)

    # Return the negative accuracy (as hyperopt minimizes the objective)
    return -test_accuracy

# Define the hyperparameter space for Bayesian optimization
space = {
    'batch_size': hp.choice('batch_size', [8, 16, 32, 64]),
    'lr': hp.loguniform('lr', -5, 0),  # Learning rate in log scale
    'alpha': hp.uniform('alpha', 0, 1),
    'rho': hp.uniform('rho', 0, 1)
}

# Perform Bayesian optimization
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=20)

# Extract the best hyperparameters
best_batch_size = [8, 16, 32, 64][best['batch_size']]
best_lr = best['lr']
best_alpha = best['alpha']
best_rho = best['rho']

print(f'Best hyperparameters: Batch Size: {best_batch_size}, Learning Rate: {best_lr}, Alpha: {best_alpha}, Rho: {best_rho}')


100%|██████████| 20/20 [38:45<00:00, 116.29s/trial, best loss: -0.26100000739097595]
Best hyperparameters: Batch Size: 32, Learning Rate: 0.009360527227641056, Alpha: 0.6919125492039278, Rho: 0.18166965893871023


with more polishing

In [None]:

from keras.datasets import cifar10
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.regularizers import l1_l2
from hyperopt import fmin, tpe, hp
from sklearn.model_selection import train_test_split
import numpy as np

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Normalize and convert to float32
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Define the input shape
input_img = Input(shape=(32, 32, 3))

# Encoder part of the CAE
x = Conv2D(64, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2))(x)

# Flatten the encoded output for MLP
flatten = Flatten()(encoded)

# Create a model for encoding features
encoder_model = Model(input_img, flatten)

# Obtain encoded features for training and validation sets
flatten_train_np = encoder_model.predict(x_train)
flatten_test_np = encoder_model.predict(x_test)

# Define the objective function for Bayesian optimization
def objective(params, flatten_train_np=flatten_train_np, y_train=y_train, flatten_test_np=flatten_test_np, y_test=y_test):
    # Extract hyperparameters
    batch_size = int(params['batch_size'])
    lr = params['lr']

    # MLP for recognition with Elastic Net regularization
    mlp = Sequential([
        Dense(400, activation='relu', input_shape=(flatten_train_np.shape[1],)),
        BatchNormalization(),
        Dropout(0.5),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax', kernel_regularizer=l1_l2(l1=params['rho'] * params['alpha'], l2=(1 - params['rho']) * params['alpha']))
    ])

    # Compile the MLP
    optimizer = Adam(learning_rate=lr)
    mlp.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Train the MLP on the encoded features
    x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(
        flatten_train_np, y_train, test_size=0.2, random_state=42
    )

    early_stopping_monitor = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
    mlp_history = mlp.fit(x_train_split, y_train_split,
                          epochs=100,
                          batch_size=batch_size,
                          validation_data=(x_val_split, y_val_split),
                          callbacks=[early_stopping_monitor],
                          verbose=0)

    # Evaluate MLP on test set
    test_loss, test_accuracy = mlp.evaluate(flatten_test_np, y_test, verbose=0)

    # Return a dictionary with 'loss' and 'status' keys
    return {'loss': -test_accuracy, 'status': 'ok', 'model': mlp}

# Define the hyperparameter space for Bayesian optimization
space = {
    'batch_size': hp.choice('batch_size', [8, 16, 32, 64]),
    'lr': hp.loguniform('lr', -5, 0),  # Learning rate in log scale
    'alpha': hp.uniform('alpha', 0, 1),
    'rho': hp.uniform('rho', 0, 1)
}

# Perform Bayesian optimization
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=20)

# Extract the best hyperparameters
best_batch_size = [8, 16, 32, 64][best['batch_size']]
best_lr = best['lr']
best_alpha = best['alpha']
best_rho = best['rho']

# Retrieve the best MLP model
best_model = objective(best)[1]

# Display sample predictions
sample_indices = np.random.choice(len(x_test), 5, replace=False)
sample_images = x_test[sample_indices]
sample_labels = y_test[sample_indices]

predictions = best_model.predict(encoder_model.predict(sample_images))
predicted_labels = np.argmax(predictions, axis=1)

print("\nSample Predictions:")
for i in range(len(sample_images)):
    print(f"Actual Label: {sample_labels[i]}, Predicted Label: {predicted_labels[i]}")

# Display the accuracy
print(f"\nBest Accuracy: {-best['loss']:.4f} with Batch Size: {best_batch_size}, Learning Rate: {best_lr}, Alpha: {best_alpha}, Rho: {best_rho}")


100%|██████████| 20/20 [31:04<00:00, 93.24s/trial, best loss: -0.38760000467300415]


KeyError: 1



```
# 100%|██████████| 20/20 [44:06<00:00, 132.30s/trial, best loss: -0.28060001134872437]
Best hyperparameters: Batch Size: 64, Learning Rate: 0.01763211249105561, Alpha: 0.8638905622827661, Rho: 0.11757792046491378
```



In [None]:
from keras.datasets import cifar10
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.regularizers import l1_l2
from hyperopt import fmin, tpe, hp
from sklearn.model_selection import train_test_split
import numpy as np

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Normalize and convert to float32
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Define the input shape
input_img = Input(shape=(32, 32, 3))

# Encoder part of the CAE
x = Conv2D(64, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2))(x)

# Flatten the encoded output for MLP
flatten = Flatten()(encoded)

# Create a model for encoding features
encoder_model = Model(input_img, flatten)

# Obtain encoded features for training and validation sets
flatten_train_np = encoder_model.predict(x_train)
flatten_test_np = encoder_model.predict(x_test)

# Define the objective function for Bayesian optimization
def objective(params, flatten_train_np=flatten_train_np, y_train=y_train, flatten_test_np=flatten_test_np, y_test=y_test):
    # Extract hyperparameters
    batch_size = int(params['batch_size'])
    lr = params['lr']

    # MLP for recognition with Elastic Net regularization
    mlp = Sequential([
        Dense(400, activation='relu', input_shape=(flatten_train_np.shape[1],)),
        BatchNormalization(),
        Dropout(0.5),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax', kernel_regularizer=l1_l2(l1=params['rho'] * params['alpha'], l2=(1 - params['rho']) * params['alpha']))
    ])

    # Compile the MLP
    optimizer = Adam(learning_rate=lr)
    mlp.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Train the MLP on the encoded features
    x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(
        flatten_train_np, y_train, test_size=0.2, random_state=42
    )

    early_stopping_monitor = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
    mlp_history = mlp.fit(x_train_split, y_train_split,
                          epochs=100,
                          batch_size=batch_size,
                          validation_data=(x_val_split, y_val_split),
                          callbacks=[early_stopping_monitor],
                          verbose=0)

    # Evaluate MLP on test set
    test_loss, test_accuracy = mlp.evaluate(flatten_test_np, y_test, verbose=0)

    # Return a dictionary with 'loss' and 'status' keys
    return {'loss': -test_accuracy, 'status': 'ok', 'accuracy': test_accuracy}

# Define the hyperparameter space for Bayesian optimization
space = {
    'batch_size': hp.choice('batch_size', [8, 16, 32, 64]),
    'lr': hp.loguniform('lr', -5, 0),  # Learning rate in log scale
    'alpha': hp.uniform('alpha', 0, 1),
    'rho': hp.uniform('rho', 0, 1)
}

# Perform Bayesian optimization
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=20)

# Extract the best hyperparameters
best_batch_size = [8, 16, 32, 64][best['batch_size']]
best_lr = best['lr']
best_alpha = best['alpha']
best_rho = best['rho']
best_accuracy = best['accuracy']

print(f'Best hyperparameters: Batch Size: {best_batch_size}, Learning Rate: {best_lr}, Alpha: {best_alpha}, Rho: {best_rho}')
print(f'Best Accuracy: {best_accuracy}')

# Retrieve the best MLP model
best_model = objective(best)[1]

# Display sample predictions
sample_indices = np.random.choice(len(x_test), 5, replace=False)
sample_images = x_test[sample_indices]
sample_labels = y_test[sample_indices]

predictions = best_model.predict(encoder_model.predict(sample_images))
predicted_labels = np.argmax(predictions, axis=1)

print("\nSample Predictions:")
for i in range(len(sample_images)):
    print(f"Actual Label: {sample_labels[i]}, Predicted Label: {predicted_labels[i]}")




## Using with MStar dataset