<a href="https://colab.research.google.com/github/noelfischer/ai_praktika/blob/main/4/P04c1_Comparing_Activation_Functions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import torch
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define the network architecture
class Net(nn.Module):
    def __init__(self, activation):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, 50)
        self.fc2 = nn.Linear(50, 10)
        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'sigmoid':
            self.activation = nn.Sigmoid()

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.activation(self.fc1(x))
        x = self.fc2(x)
        return x

# Load the MNIST dataset
transform = transforms.ToTensor()
train_data = datasets.MNIST(root='~/.pytorch/MNIST_data/', train=True, transform=transform, download=True)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)

# Define the loss

In [5]:
# Define the loss
criterion = nn.CrossEntropyLoss()

# Create the networks
net_relu = Net('relu')
net_sigmoid = Net('sigmoid')

# Define the optimizer
optimizer_relu = torch.optim.SGD(net_relu.parameters(), lr=0.01)
optimizer_sigmoid = torch.optim.SGD(net_sigmoid.parameters(), lr=0.01)

# Train the networks
for epoch in range(10):  # loop over the dataset multiple times
    for nets, optimizers in [(net_relu, optimizer_relu), (net_sigmoid, optimizer_sigmoid)]:
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizers.zero_grad()

            # forward + backward + optimize
            outputs = nets(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizers.step()

            # print statistics
            running_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

print('Finished Training')



Epoch 1, Loss: 1.3121519801713257
Epoch 1, Loss: 2.176288458457125
Epoch 2, Loss: 0.510999748701734
Epoch 2, Loss: 1.7373025421140544
Epoch 3, Loss: 0.40058127312517877
Epoch 3, Loss: 1.2299550234127654
Epoch 4, Loss: 0.359361439530275
Epoch 4, Loss: 0.9155972765834093
Epoch 5, Loss: 0.335336933162675
Epoch 5, Loss: 0.741535498071581
Epoch 6, Loss: 0.3188879701183803
Epoch 6, Loss: 0.6361995809622156
Epoch 7, Loss: 0.305684728186522
Epoch 7, Loss: 0.5664418975173283
Epoch 8, Loss: 0.2949697911294539
Epoch 8, Loss: 0.5173365430219341
Epoch 9, Loss: 0.2852219057950511
Epoch 9, Loss: 0.4808274563123931
Epoch 10, Loss: 0.27660801290258413
Epoch 10, Loss: 0.4526781660280248
Finished Training


In [6]:
# Now let's evaluate the performance of the two models
test_data = datasets.MNIST(root='~/.pytorch/MNIST_data/', train=False, transform=transform, download=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=True)

correct_relu = 0
total_relu = 0
correct_sigmoid = 0
total_sigmoid = 0

with torch.no_grad():
    for data in test_loader:
        images, labels = data

        # Evaluate ReLU network
        outputs = net_relu(images)
        _, predicted = torch.max(outputs.data, 1)
        total_relu += labels.size(0)
        correct_relu += (predicted == labels).sum().item()

        # Evaluate Sigmoid network
        outputs = net_sigmoid(images)
        _, predicted = torch.max(outputs.data, 1)
        total_sigmoid += labels.size(0)
        correct_sigmoid += (predicted == labels).sum().item()

print('Accuracy of the network with ReLU on the 10000 test images: %d %%' % (100 * correct_relu / total_relu))
print('Accuracy of the network with sigmoid on the 10000 test images: %d %%' % (100 * correct_sigmoid / total_sigmoid))

Accuracy of the network with ReLU on the 10000 test images: 92 %
Accuracy of the network with sigmoid on the 10000 test images: 89 %
