In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
#added for Net model MNIST dataset
import tensorflow as tf
from tensorflow import keras
#added for Net model Fashion-MNIST
from tensorflow.keras import layers
#added for Model model MNIST dataset
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import os
#added for Model Fashion-MNIST dataset
from sklearn.model_selection import train_test_split


In [None]:
class Net(nn.Module):
    def __init__(self, num_classes=10):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 7 * 7, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 7 * 7)
        x = F.relu(self.fc1(x))
        x = F.softmax(self.fc2(x), dim=1)
        return x

In [None]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500,10)

    def forward(self, x):
        x_1 = F.relu(self.conv1(x))
        x = F.max_pool2d(x_1, 2, 2)
        x_2 = F.relu(self.conv2(x))
        x = F.max_pool2d(x_2, 2, 2)
        x = x.view(-1, 4*4*50)
        x_3 = F.relu(self.fc1(x))
        h = F.softmax(self.fc2(x_3),dim=1)
        return h, x_3, x_2, x_1

## When training the models, I consider small batch size to help the models generalize better by reducing the effects of noise and providing more diverse examples in each update step. However, smaller batch size resulted in slower training times for my NET CNN model and so is less efficient use of hardware resources.

## The paper considers 256 batch size which can cause potential overfitting depending of images' complexity and size. 

# Train NET with MNIST keras


In [None]:
import tensorflow as tf
from tensorflow import keras

# Load MNIST dataset
(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()

# Reshape and normalize data
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1).astype('float32') / 255.0
test_images = test_images.reshape(test_images.shape[0], 28, 28, 1).astype('float32') / 255.0

# Define the model
model = keras.Sequential([
    keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', padding='same', input_shape=(28, 28, 1)),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu', padding='same'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Conv2D(filters=128, kernel_size=3, activation='relu', padding='same'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2301071d550>

In [None]:
import tensorflow as tf
from tensorflow import keras

def train_mnist(num_epochs):
    # Load MNIST dataset
    (train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data()

    # Reshape and normalize data
    train_images = train_images.reshape(train_images.shape[0], 28, 28, 1).astype('float32') / 255.0
    test_images = test_images.reshape(test_images.shape[0], 28, 28, 1).astype('float32') / 255.0

    # Define the model
    model = keras.Sequential([
        keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', padding='same', input_shape=(28, 28, 1)),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu', padding='same'),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Conv2D(filters=128, kernel_size=3, activation='relu', padding='same'),
        keras.layers.MaxPooling2D(pool_size=(2, 2)),
        keras.layers.Flatten(),
        keras.layers.Dense(512, activation='relu'),
        keras.layers.Dense(10, activation='softmax')
    ])

    best_model = None
    best_val_acc = 0.0

    # Try different number of epochs and keep track of the best model
    for epoch in num_epochs:
        print(f'Training model for {epoch} epochs...')
        model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        model.fit(train_images, train_labels, epochs=epoch, validation_data=(test_images, test_labels))
        _, val_acc = model.evaluate(test_images, test_labels)
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model = model
        print(f'Validation accuracy for {epoch} epochs: {val_acc}\n')

    print(f'Best model validation accuracy: {best_val_acc}\n')

       # Save the best model to a file
    best_model_path = 'best_model.h5'
    if os.path.exists(best_model_path):
        os.remove(best_model_path)
    best_model.save(best_model_path)
    print(f'Best model saved to {best_model_path}\n')

    return best_model

train_mnist([5, 10, 15])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Training model for 5 epochs...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Validation accuracy for 5 epochs: 0.9904000163078308

Training model for 10 epochs...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Validation accuracy for 10 epochs: 0.992900013923645

Training model for 15 epochs...
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Validation accuracy for 15 epochs: 0.9929999709129333

Best model validation accuracy: 0.9929999709129333

Best model saved to best_model.h5



<keras.engine.sequential.Sequential at 0x7f9e4d8e6710>

# Train Model with MNIST

In [10]:
def train_and_evaluate_model(num_epochs_list):
    # Load the MNIST dataset
    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    # Preprocess the data
    x_train = x_train.astype('float32') / 255.
    x_test = x_test.astype('float32') / 255.
    x_train = np.expand_dims(x_train, axis=-1)
    x_test = np.expand_dims(x_test, axis=-1)
    y_train = keras.utils.to_categorical(y_train, 10)
    y_test = keras.utils.to_categorical(y_test, 10)

    # Define the model
    model2 = Sequential()
    model2.add(Conv2D(20, (5, 5), activation='relu', input_shape=(28, 28, 1)))
    model2.add(MaxPooling2D(pool_size=(2, 2)))
    model2.add(Conv2D(50, (5, 5), activation='relu'))
    model2.add(MaxPooling2D(pool_size=(2, 2)))
    model2.add(Flatten())
    model2.add(Dense(500, activation='relu'))
    model2.add(Dense(10, activation='softmax'))

    # Compile the model
    model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the model
    model2.fit(x_train, y_train, epochs=num_epochs_list, validation_data=(x_test, y_test))

    # Save the model
    model2.save('model2.h5')
    
    return model2
train_and_evaluate_model(5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.engine.sequential.Sequential at 0x7f8cd1c9f640>

# Train NET with Fashion-MNIST 

In [4]:
# Load the Fashion-MNIST dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

# Preprocess the data
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# Define the model
model1 = keras.Sequential(
    [
        layers.Conv2D(32, kernel_size=3, activation="relu", input_shape=(28, 28, 1)),
        layers.MaxPooling2D(),
        layers.Conv2D(64, kernel_size=3, activation="relu"),
        layers.MaxPooling2D(),
        layers.Conv2D(128, kernel_size=3, activation="relu"),
        layers.Flatten(),
        layers.Dense(512, activation="relu"),
        layers.Dense(10, activation="softmax"),
    ]
)

# Compile the model
model1.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Train the model
history = model1.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

# Evaluate the model
test_loss, test_acc = model1.evaluate(x_test, y_test)
print("Test accuracy:", test_acc)

# Save the model
model1.save("my_model1.h5")


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test accuracy: 0.9107999801635742
