In [1]:
import torch
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define the network architecture
class Net(nn.Module):
    def __init__(self, activation):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, 50)
        self.fc2 = nn.Linear(50, 10)
        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'sigmoid':
            self.activation = nn.Sigmoid()

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.activation(self.fc1(x))
        x = self.fc2(x)
        return x

# Load the MNIST dataset
transform = transforms.ToTensor()
train_data = datasets.MNIST(root='~/.pytorch/MNIST_data/', train=True, transform=transform, download=True)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)

# Define the loss

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /Users/alisarupenyan/.pytorch/MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


100%|███████████████████████████| 9912422/9912422 [00:00<00:00, 22327996.14it/s]


Extracting /Users/alisarupenyan/.pytorch/MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz to /Users/alisarupenyan/.pytorch/MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /Users/alisarupenyan/.pytorch/MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|███████████████████████████████| 28881/28881 [00:00<00:00, 78253032.19it/s]


Extracting /Users/alisarupenyan/.pytorch/MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz to /Users/alisarupenyan/.pytorch/MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /Users/alisarupenyan/.pytorch/MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|███████████████████████████| 1648877/1648877 [00:00<00:00, 20185075.76it/s]


Extracting /Users/alisarupenyan/.pytorch/MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to /Users/alisarupenyan/.pytorch/MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /Users/alisarupenyan/.pytorch/MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████████████████████████████| 4542/4542 [00:00<00:00, 3412238.72it/s]


Extracting /Users/alisarupenyan/.pytorch/MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to /Users/alisarupenyan/.pytorch/MNIST_data/MNIST/raw



In [2]:
# Define the loss
criterion = nn.CrossEntropyLoss()

# Create the networks
net_relu = Net('relu')
net_sigmoid = Net('sigmoid')

# Define the optimizer
optimizer_relu = torch.optim.SGD(net_relu.parameters(), lr=0.01)
optimizer_sigmoid = torch.optim.SGD(net_sigmoid.parameters(), lr=0.01)

# Train the networks
for epoch in range(10):  # loop over the dataset multiple times
    for nets, optimizers in [(net_relu, optimizer_relu), (net_sigmoid, optimizer_sigmoid)]:
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizers.zero_grad()

            # forward + backward + optimize
            outputs = nets(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizers.step()

            # print statistics
            running_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

print('Finished Training')



Epoch 1, Loss: 1.275695714297325
Epoch 1, Loss: 2.1817967650224404
Epoch 2, Loss: 0.5016677349424565
Epoch 2, Loss: 1.74553717631521
Epoch 3, Loss: 0.39599004216285655
Epoch 3, Loss: 1.2590221718811532
Epoch 4, Loss: 0.354133598728856
Epoch 4, Loss: 0.9565394388587236
Epoch 5, Loss: 0.32898430592183875
Epoch 5, Loss: 0.7793668865649177
Epoch 6, Loss: 0.31062791600569223
Epoch 6, Loss: 0.6670107591444495
Epoch 7, Loss: 0.29546704341861996
Epoch 7, Loss: 0.5909519362678406
Epoch 8, Loss: 0.28286160612061845
Epoch 8, Loss: 0.5366010364693111
Epoch 9, Loss: 0.2721461499455387
Epoch 9, Loss: 0.49613129288784225
Epoch 10, Loss: 0.26207339014611775
Epoch 10, Loss: 0.4651315826406357


<function print>

In [None]:
# Now let's evaluate the performance of the two models
test_data = datasets.MNIST(root='~/.pytorch/MNIST_data/', train=False, transform=transform, download=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=True)

correct_relu = 0
total_relu = 0
correct_sigmoid = 0
total_sigmoid = 0

with torch.no_grad():
    for data in test_loader:
        images, labels = data
        
        # Evaluate ReLU network
        outputs = net_relu(images)
        _, predicted = torch.max(outputs.data, 1)
        total_relu += labels.size(0)
        correct_relu += (predicted == labels).sum().item()
        
        # Evaluate Sigmoid network
        outputs = net_sigmoid(images)
        _, predicted = torch.max(outputs.data, 1)
        total_sigmoid += labels.size(0)
        correct_sigmoid += (predicted == labels).sum().item()

print('Accuracy of the network with ReLU on the 10000 test images: %d %%' % (100 * correct_relu / total_relu))
print('Accuracy of the network