# Homework - DNN

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

## Preprocessing

In [2]:
# the data, shuffled and split between train and test sets
(X_train_pre, y_train_pre), (X_test_pre, y_test_pre) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
input_dim = 784 # 28*28
X_train = X_train_pre.reshape(X_train_pre.shape[0], input_dim)
X_test = X_test_pre.reshape(X_test_pre.shape[0], input_dim)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255.
X_test /= 255.

print('training data shape:', X_train.shape)
print('{} train samples, {} test samples'.format(X_train.shape[0], X_test.shape[0]))

training data shape: (60000, 784)
60000 train samples, 10000 test samples


In [4]:
num_classes = 10 # 0-9
y_train = keras.utils.to_categorical(y_train_pre, num_classes)
y_test = keras.utils.to_categorical(y_test_pre, num_classes)

In [5]:
validation_split = 0.166667
index = np.random.permutation(X_train.shape[0])

train_idx = int(X_train.shape[0]*validation_split)

X_val, X_train = X_train[index[:train_idx]], X_train[index[train_idx:]]
y_val, y_train = y_train[index[:train_idx]], y_train[index[train_idx:]]


## The models

### Build the model **1**

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
model1 = Sequential([
    Dense(128, input_shape=(784,), activation='relu'),  # First hidden layer with 128 neurons and ReLU activation
    Dense(64, activation='relu'),                       # Second hidden layer with 64 neurons and ReLU activation
    Dense(10, activation='softmax')                     # Output layer with 10 neurons (one for each digit) and softmax activation
])

# Compile the model
model1.compile(optimizer=Adam(learning_rate=0.001),
               loss='categorical_crossentropy',
               metrics=['accuracy'])



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
model1.summary()

In [11]:
# Train the model
history1 = model1.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val), verbose=1)

Epoch 1/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8664 - loss: 0.4577 - val_accuracy: 0.9588 - val_loss: 0.1434
Epoch 2/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9644 - loss: 0.1190 - val_accuracy: 0.9591 - val_loss: 0.1323
Epoch 3/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.9764 - loss: 0.0736 - val_accuracy: 0.9722 - val_loss: 0.0910
Epoch 4/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.9825 - loss: 0.0547 - val_accuracy: 0.9719 - val_loss: 0.0920
Epoch 5/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.9874 - loss: 0.0396 - val_accuracy: 0.9751 - val_loss: 0.0821
Epoch 6/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.9903 - loss: 0.0312 - val_accuracy: 0.9754 - val_loss: 0.0875
Epoch 7/10
[

In [13]:
_, acc = model1.evaluate(X_test, y_test, verbose=0)

print("Testing set accuracy: {:.2f}%".format(acc*100))

Testing set accuracy: 97.95%


### Build the model **2**

In [29]:
from tensorflow.keras.layers import Dropout

# Define the model
model2 = Sequential([
    Dense(64, input_shape=(784,), activation='relu'),  # Changed to ReLU
    Dropout(0.2),                                     # Dropout for regularization
    Dense(32, activation='relu'),                     # Continued ReLU
    Dropout(0.2),                                     # Another dropout layer
    Dense(16, activation='relu'),                     # More ReLU
    Dense(10, activation='softmax')
])

# Compile the model
model2.compile(optimizer=Adam(learning_rate=0.001),  # Switched to Adam
               loss='categorical_crossentropy',
               metrics=['accuracy'])




  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [30]:
model2.summary()

In [31]:
# Train the model
history2 = model2.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_val, y_val), verbose=1)

Epoch 1/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.6448 - loss: 1.0772 - val_accuracy: 0.9310 - val_loss: 0.2294
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.9096 - loss: 0.3121 - val_accuracy: 0.9506 - val_loss: 0.1714
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9310 - loss: 0.2371 - val_accuracy: 0.9557 - val_loss: 0.1484
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9425 - loss: 0.1998 - val_accuracy: 0.9616 - val_loss: 0.1319
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9482 - loss: 0.1781 - val_accuracy: 0.9624 - val_loss: 0.1277
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.9492 - loss: 0.1669 - val_accuracy: 0.9644 - val_loss: 0.1191
Epoch 7/10
[1m782/782[0m 

In [32]:
_, acc = model2.evaluate(X_test, y_test, verbose=0)

print("Testing set accuracy: {:.2f}%".format(acc*100))

Testing set accuracy: 97.26%


### Build the model **3**

In [18]:
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping

# Define the model
model3 = Sequential([
    Dense(256, input_shape=(784,), activation='tanh'),  # Larger single layer with tanh activation
    Dense(10, activation='softmax')
])

# Compile the model
model3.compile(optimizer=RMSprop(learning_rate=0.001),
               loss='categorical_crossentropy',
               metrics=['accuracy'])

# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [20]:
model3.summary()

In [22]:
# Train the model
history3 = model3.fit(X_train, y_train, epochs=15, batch_size=128, validation_data=(X_val, y_val),
                      callbacks=[early_stopping], verbose=1)

Epoch 1/15
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9982 - loss: 0.0111 - val_accuracy: 0.9790 - val_loss: 0.0773
Epoch 2/15
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.9989 - loss: 0.0083 - val_accuracy: 0.9784 - val_loss: 0.0778
Epoch 3/15
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.9991 - loss: 0.0072 - val_accuracy: 0.9790 - val_loss: 0.0781
Epoch 4/15
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.9994 - loss: 0.0058 - val_accuracy: 0.9786 - val_loss: 0.0773
Epoch 4: early stopping


In [24]:
_, acc = model3.evaluate(X_test, y_test, verbose=0)

print("Testing set accuracy: {:.2f}%".format(acc*100))

Testing set accuracy: 97.92%


In [33]:
# Evaluate Model 1
test_loss, test_acc = model1.evaluate(X_test, y_test, verbose=0)
print(f"Model 1 Accuracy: {test_acc*100:.2f}%")

# Evaluate Model 2
test_loss, test_acc = model2.evaluate(X_test, y_test, verbose=0)
print(f"Model 2 Accuracy: {test_acc*100:.2f}%")

# Evaluate Model 3
test_loss, test_acc = model3.evaluate(X_test, y_test, verbose=0)
print(f"Model 3 Accuracy: {test_acc*100:.2f}%")


Model 1 Accuracy: 97.95%
Model 2 Accuracy: 97.26%
Model 3 Accuracy: 97.92%
