**Deep Learning**

*Dataset: MNIST, CIFAR10*


**1.	Detect handwritten digit**



-	Train an MLP/FNN(multilayer perceptron/feedforward neural network) on the MNIST dataset. See if we can get over 98% accuracy.
-	Try different activation functions
-   Try different learning rates
-   Try different architectures



In [None]:
import tensorflow as tf
from tensorflow import keras
from functools import partial

# Task 1: Detect handwritten digits (MNIST)
# Due to the simplicity of the task, we can use a simple MLP to classify the images with just 1000 first rows only.

def train_mnist_mlp(activation='relu', learning_rate=0.001, hidden_layers=[300, 100]):
    (X_train_full, y_train_full), (X_test, y_test) = keras.datasets.mnist.load_data()
    X_train_full = X_train_full.reshape((60000, 28 * 28)).astype('float32') / 255

    # Limit the training dataset to 1000 rows
    X_train_full = X_train_full[:1000]
    y_train_full = y_train_full[:1000]

    X_test = X_test.reshape((10000, 28 * 28)).astype('float32') / 255
    X_valid, X_train = X_train_full[:500], X_train_full[500:]
    y_valid, y_train = y_train_full[:500], y_train_full[500:]

    model = keras.models.Sequential()
    model.add(keras.layers.Dense(hidden_layers[0], activation=activation, input_shape=X_train.shape[1:]))
    for units in hidden_layers[1:]:
        model.add(keras.layers.Dense(units, activation=activation))
    model.add(keras.layers.Dense(10, activation='softmax'))

    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    history = model.fit(X_train, y_train, epochs=30, validation_data=(X_valid, y_valid))

    test_loss, test_acc = model.evaluate(X_test, y_test)
    print(f"MNIST Test accuracy: {test_acc}")

    return history, test_acc

# Test different configurations
print("MNIST Training with ReLU, lr=0.001:")
train_mnist_mlp()

print("\nMNIST Training with sigmoid, lr=0.01:")
train_mnist_mlp(activation='sigmoid', learning_rate=0.01)

print("\nMNIST Training with hidden layers 500, 200, lr=0.0005:")
train_mnist_mlp(hidden_layers=[500,200], learning_rate=0.0005)

MNIST Training with ReLU, lr=0.001:
Epoch 1/30
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.3246 - loss: 2.0231 - val_accuracy: 0.7360 - val_loss: 1.1378
Epoch 2/30
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8323 - loss: 0.8020 - val_accuracy: 0.8080 - val_loss: 0.6709
Epoch 3/30
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8997 - loss: 0.3950 - val_accuracy: 0.8460 - val_loss: 0.5428
Epoch 4/30
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9391 - loss: 0.2362 - val_accuracy: 0.8260 - val_loss: 0.5207
Epoch 5/30
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9758 - loss: 0.1648 - val_accuracy: 0.8580 - val_loss: 0.4832
Epoch 6/30
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9897 - loss: 0.0850 - val_accuracy: 0.8580 - val_loss: 0.5079
Epoc

(<keras.src.callbacks.history.History at 0x35e64fa00>, 0.8496999740600586)

**2.	Recognize objects**

-   Train a DNN (deep neural network) with 20 hidden layers and 100 neurons each on the CIFAR10 dataset.
-   Try ELU activation function
-   Try NADAM momentum optimization with early stopping
-   Try Batch Normalization optimization with SELU activation function
-   Try regularization with dropout

Hints: tensorflow, keras.datasets.mnist.lead_data(), keras.datasets.cifar10.lead_data(), keras.models, heras.layers, keyras.initilizers, keras.optimizers, functools


In [7]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(300, activation="relu"),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dense(10, activation="softmax")
])

model.summary()

  super().__init__(**kwargs)


In [8]:
# Modify the output layer to produce a tensor with shape (None, 10)
model.add(keras.layers.Flatten())  # Flatten the output before the final Dense layer
model.add(keras.layers.Dense(10, activation='softmax'))

In [None]:
import tensorflow as tf
from tensorflow import keras

# Load the CIFAR10 dataset
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()

# Limit the training dataset to 1000 rows due to local performance issues
X_train_full = X_train_full[:1000]
y_train_full = y_train_full[:1000]

# Preprocess the data
X_train_full = X_train_full.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0
X_valid, X_train = X_train_full[:500], X_train_full[500:]
y_valid, y_train = y_train_full[:500], y_train_full[500:]

# Define the DNN model with ELU activation and NADAM optimizer
def create_dnn_model(activation='elu', optimizer='nadam', batch_norm=False, dropout_rate=0.0):
    model = keras.models.Sequential()
    model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
    for _ in range(20):
        if batch_norm:
            model.add(keras.layers.Dense(100, kernel_initializer='lecun_normal', use_bias=False))
            model.add(keras.layers.BatchNormalization())
            model.add(keras.layers.Activation(activation))
        else:
            model.add(keras.layers.Dense(100, activation=activation))
        if dropout_rate > 0:
            model.add(keras.layers.Dropout(dropout_rate))
    model.add(keras.layers.Dense(10, activation='softmax'))

    if optimizer == 'nadam':
        optimizer = keras.optimizers.Nadam()
    else:
        optimizer = keras.optimizers.Adam()

    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

# Train the model with NADAM and early stopping
dnn_model = create_dnn_model()
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
history_nadam = dnn_model.fit(X_train, y_train, epochs=100, validation_data=(X_valid, y_valid), callbacks=[early_stopping_cb])

# Train the model with Batch Normalization and SELU
dnn_model_bn = create_dnn_model(activation='selu', optimizer='adam', batch_norm=True)
history_bn = dnn_model_bn.fit(X_train, y_train, epochs=100, validation_data=(X_valid, y_valid), callbacks=[early_stopping_cb])

# Train the model with dropout regularization
dnn_model_dropout = create_dnn_model(dropout_rate=0.2)
history_dropout = dnn_model_dropout.fit(X_train, y_train, epochs=100, validation_data=(X_valid, y_valid), callbacks=[early_stopping_cb])

# Evaluate the models
loss_nadam, accuracy_nadam = dnn_model.evaluate(X_test, y_test)
loss_bn, accuracy_bn = dnn_model_bn.evaluate(X_test, y_test)
loss_dropout, accuracy_dropout = dnn_model_dropout.evaluate(X_test, y_test)

print(f'DNN with NADAM - Loss: {loss_nadam}, Accuracy: {accuracy_nadam}')
print(f'DNN with Batch Normalization and SELU - Loss: {loss_bn}, Accuracy: {accuracy_bn}')
print(f'DNN with Dropout - Loss: {loss_dropout}, Accuracy: {accuracy_dropout}')

Epoch 1/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.1137 - loss: 2.3835 - val_accuracy: 0.1060 - val_loss: 2.3318
Epoch 2/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.1212 - loss: 2.3134 - val_accuracy: 0.1640 - val_loss: 2.1921
Epoch 3/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.1607 - loss: 2.2178 - val_accuracy: 0.1720 - val_loss: 2.1431
Epoch 4/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.1199 - loss: 2.1534 - val_accuracy: 0.1920 - val_loss: 2.0962
Epoch 5/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.2645 - loss: 2.0289 - val_accuracy: 0.1920 - val_loss: 2.1288
Epoch 6/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.2206 - loss: 2.0208 - val_accuracy: 0.2600 - val_loss: 2.0326
Epoch 7/100
[1m16/16[0m [32m━━