
<br>
Test Activation Functions on the MNIST Dataset<br>
<br>
In[1]<br>
Import the libraries

Using the following line code to install the torchvision library

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import torch.nn.functional as F

In [None]:
import matplotlib.pylab as plt

use_cuda = torch.cuda.is_available()<br>
device = torch.device("cuda:0" if use_cuda else "cpu")<br>
torch.backends.cudnn.benchmark = True

In[2]

Build the model with sigmoid function

In [None]:
class Net(nn.Module):

    # Constructor
    def __init__(self, D_in, H, D_out):
        super().__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)

    # Prediction
    def forward(self, x):
        x = self.linear1(x)
        x = torch.sigmoid(x)
        x = self.linear2(x)
        return x

Build the model with Tanh function

In [None]:
class NetTanh(nn.Module):

    # Constructor
    def __init__(self, D_in, H, D_out):
        super().__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)

    # Prediction
    def forward(self, x):
        x = self.linear1(x)
        x = torch.tanh(x)
        x = self.linear2(x)
        return x

Build the model with Relu function

In [None]:
class NetRelu(nn.Module):

    # Constructor
    def __init__(self, D_in, H, D_out):
        super().__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)

    # Prediction
    def forward(self, x):
        x = self.linear1(x)
        x = torch.relu(x)
        x = self.linear2(x)
        return x

In[3]<br>
Define the function for training the model

In [None]:
def train(model, criterion, train_loader, validation_loader, optimizer, epochs=100):
    i = 0
    useful_stuff = {'training_loss': [], 'validation_accuracy': []}
    for epoch in range(epochs):
        for i, (x, y) in enumerate(train_loader):
            #x, y = x.to(device), x.to(device)
            optimizer.zero_grad()
            z = model(x.view(-1, 28 * 28))
            loss = criterion(z, y)
            loss.backward()
            optimizer.step()
            useful_stuff['training_loss'].append(loss.item())
        correct = 0
        for x, y in validation_loader:
            #x, y = x.to(device), y.to(device)
            z = model(x.view(-1, 28 * 28))
            _, label = torch.max(z, 1)
            correct += (label == y).sum().item()
        accuracy = 100 * (correct / len(validation_dataset))
        useful_stuff['validation_accuracy'].append(accuracy)
    return useful_stuff

In[4]<br>
Create the training dataset

In [None]:
train_dataset = dsets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
print(len(train_dataset))

Create the validation  dataset

In [None]:
validation_dataset = dsets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
print(len(validation_dataset))

Create the criterion function

In [None]:
criterion = nn.CrossEntropyLoss()

Create the training data loader and validation data loader object

In [None]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=2000, shuffle=True)
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=5000, shuffle=False)

Create the model object

In [None]:
input_dim = 28 * 28
hidden_dim = 100
output_dim = 10

In [None]:
model = Net(input_dim, hidden_dim, output_dim)
#model = model.cuda()

Train a model with sigmoid function

In [None]:
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
training_results = train(model, criterion, train_loader, validation_loader, optimizer, epochs=30)

Train a model with Tanh function

In [None]:
model_Tanh = NetTanh(input_dim, hidden_dim, output_dim)
#model = model_Tanh.cuda()
optimizer = torch.optim.SGD(model_Tanh.parameters(), lr=learning_rate)
training_results_tanch = train(model_Tanh, criterion, train_loader, validation_loader, optimizer, epochs=30)

Train a model with Relu function

In [None]:
modelRelu = NetRelu(input_dim, hidden_dim, output_dim)
#model = modelRelu.cuda()
optimizer = torch.optim.SGD(modelRelu.parameters(), lr=learning_rate)
training_results_relu = train(modelRelu, criterion, train_loader, validation_loader, optimizer, epochs=30)

Compare the training loss

In [None]:
plt.plot(training_results_tanch['training_loss'], label='tanh')
plt.plot(training_results['training_loss'], label='sigmoid')
plt.plot(training_results_relu['training_loss'], label='relu')
plt.ylabel('loss')
plt.title('training loss iterations')
plt.legend()
plt.show()

Compare the validation loss

In [None]:
plt.plot(training_results_tanch['validation_accuracy'], label='tanh')
plt.plot(training_results['validation_accuracy'], label='sigmoid')
plt.plot(training_results_relu['validation_accuracy'], label='relu')
plt.ylabel('validation accuracy')
plt.xlabel('epochs ')
plt.legend()
plt.show()