In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Define the CNN model
class CNN(nn.Module):
    def __init__(self, activation_fn):
        super(CNN, self).__init__()
        self.activation_fn = activation_fn

        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
        self.conv6 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)

        self.fc1 = nn.Linear(512, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.activation_fn(self.conv1(x))
        x = self.activation_fn(self.conv2(x))
        x = self.activation_fn(self.conv3(x))
        x = self.activation_fn(self.conv4(x))
        x = self.activation_fn(self.conv5(x))
        x = self.activation_fn(self.conv6(x))

        x = torch.mean(x, dim=(2, 3))  # Global average pooling
        x = self.activation_fn(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
# Define the Mish activation function
class Mish(nn.Module):
    def __init__(self):
        super(Mish, self).__init__()

    def forward(self, x):
        return x * torch.tanh(nn.functional.softplus(x))

# Define the Swish activation function
class Swish(nn.Module):
    def __init__(self):
        super(Swish, self).__init__()

    def forward(self, x):
        return x * torch.sigmoid(x)

In [None]:
 # Define the training function
def train(model, criterion, optimizer, trainloader, epochs, loss_by_epochs):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in tqdm(trainloader):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch {epoch+1}/{epochs} - Loss: {running_loss / len(trainloader)}")
        loss_by_epochs.append((running_loss / len(trainloader)))
    return loss_by_epochs

In [None]:
# Set random seed for reproducibility
torch.manual_seed(42)

# Load and preprocess CIFAR-10 dataset
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [None]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)


In [None]:
model_mish = CNN(activation_fn=Mish()).to(device)

In [None]:
model_relu = CNN(activation_fn=nn.ReLU()).to(device)

In [None]:
model_swish = CNN(activation_fn=Swish()).to(device)

In [None]:
# Define the loss function and optimizer
num_epochs = 25
criterion = nn.CrossEntropyLoss()
optimizer_mish = optim.Adam(model_mish.parameters(), lr=0.001)
optimizer_relu = optim.Adam(model_relu.parameters(), lr=0.001)
optimizer_swish = optim.Adam(model_swish.parameters(), lr=0.001)

In [None]:
# Train the model with ReLU activation function
print("Training the model with ReLU activation function...")
loss_by_epochs_relu = train(model_relu, criterion, optimizer_relu, trainloader, epochs=num_epochs, loss_by_epochs=[])


In [None]:
print("Training the model with Mish activation function...")
loss_by_epochs_mish = train(model_mish, criterion, optimizer_mish, trainloader, epochs=num_epochs, loss_by_epochs=[])

In [None]:
# Train the model with Swish activation function
print("Training the model with Swish activation function...")
loss_by_epochs_swish = train(model_swish, criterion, optimizer_swish, trainloader, epochs=num_epochs, loss_by_epochs=[])


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12,6))
xs = range(num_epochs)

plt.plot(xs, loss_by_epochs_mish, label='Mish')
plt.plot(xs, loss_by_epochs_relu, label='ReLU')
plt.plot(xs, loss_by_epochs_swish, label='Swish')
plt.title("Change in loss values as the models were trained over the 25 epochs.")
plt.xlabel("Epochs (1-25)")
plt.ylabel("Corresponding loss values")
plt.legend()

plt.plot()




In [None]:
# Evaluate the models on the test set
def evaluate(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

In [None]:
accuracy_mish = evaluate(model_mish, testloader)
accuracy_relu = evaluate(model_relu, testloader)
accuracy_swish = evaluate(model_swish, testloader)

print(f"Accuracy of the model with Mish activation function: {accuracy_mish}%")
print(f"Accuracy of the model with ReLU activation function: {accuracy_relu}%")
print(f"Accuracy of the model with Swish activation function: {accuracy_swish}%")

In [None]:
torch.save(model_relu, "modelrelu_cifar.pth")
torch.save(model_relu.state_dict(), "modelrelu_cifar_statedict.pth")

In [None]:
torch.save(model_mish, "model_mish_cifar.pth")
torch.save(model_mish.state_dict(), "model_mish_cifar_statedict.pth")

In [None]:
torch.save(model_swish, "model_swish_cifar.pth")
torch.save(model_swish.state_dict(), "model_swish_cifar_statedict.pth")