In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

class VGG11_Sigmoid(nn.Module):
    def __init__(self):
        super(VGG11_Sigmoid, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.Sigmoid(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.Sigmoid(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.Sigmoid(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.Sigmoid(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.Sigmoid(),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.Sigmoid(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.Sigmoid(),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.Sigmoid(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(512, 4096),
            nn.Sigmoid(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.Sigmoid(),
            nn.Dropout(0.5),
            nn.Linear(4096, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# Data augmentation transformations
data_augmentation = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.RandomRotation(degrees=10),
    transforms.RandomAffine(0, translate=(0.1, 0.1)),
    transforms.RandomResizedCrop(size=32, scale=(0.9, 1.1)),
    transforms.ToTensor(),
])

train_dataset = datasets.MNIST(root='./data', train=True, transform=data_augmentation, download=True)
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)

test_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
])

test_dataset = datasets.MNIST(root='./data', train=False, transform=test_transform, download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

device = torch.device('cpu')
model_sigmoid = VGG11_Sigmoid()
model_sigmoid.to(device)
criterion = nn.CrossEntropyLoss()

# Function to train and evaluate model
def train_model(optimizer):
    train_accuracies, test_accuracies = [], []

    for epoch in range(20):
        model_sigmoid.train()
        running_loss, correct_train, total_train = 0.0, 0, 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model_sigmoid(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        train_accuracy = 100 * correct_train / total_train
        train_accuracies.append(train_accuracy)

        model_sigmoid.eval()
        correct_test, total_test = 0, 0

        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model_sigmoid(images)
                _, predicted = torch.max(outputs, 1)
                total_test += labels.size(0)
                correct_test += (predicted == labels).sum().item()

        test_accuracy = 100 * correct_test / total_test
        test_accuracies.append(test_accuracy)
        print(f"Epoch [{epoch+1}/20], Train Accuracy: {train_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%")

    return train_accuracies, test_accuracies

# Train with AdaDelta
optimizer_adadelta = optim.Adadelta(model_sigmoid.parameters(), lr=1.0)
train_accuracies_adadelta, test_accuracies_adadelta = train_model(optimizer_adadelta)

# Train with Adam
model_sigmoid = VGG11_Sigmoid().to(device)  # reinitialize model
optimizer_adam = optim.Adam(model_sigmoid.parameters(), lr=0.001)
train_accuracies_adam, test_accuracies_adam = train_model(optimizer_adam)

# Plotting the results
epochs = range(1, 21)
plt.figure(figsize=(12, 6))
plt.plot(epochs, train_accuracies_adadelta, label="Train Accuracy (AdaDelta)")
plt.plot(epochs, test_accuracies_adadelta, label="Test Accuracy (AdaDelta)")
plt.plot(epochs, train_accuracies_adam, label="Train Accuracy (Adam)")
plt.plot(epochs, test_accuracies_adam, label="Test Accuracy (Adam)")
plt.xlabel("Epochs")
plt.ylabel("Accuracy (%)")
plt.title("Training and Test Accuracies vs Epochs (Sigmoid)")
plt.legend()
plt.show()


Epoch [1/20], Train Accuracy: 16.47%, Test Accuracy: 11.68%
Epoch [2/20], Train Accuracy: 33.72%, Test Accuracy: 29.39%
Epoch [3/20], Train Accuracy: 46.94%, Test Accuracy: 41.63%
