# BONUS TASK

Compare 3 configurations for the activation function. Show and explain your performance result

In [None]:
#Import all libraries
import torch
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn as nn
import matplotlib.pyplot as plt
import torch.optim as optim

In [None]:
# Load MNIST dataset for the data loader
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)),])

mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(mnist_trainset, batch_size=10, shuffle=True)

mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(mnist_testset, batch_size=10, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 103032080.64it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 76233916.82it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 26038649.69it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 13966663.32it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



Membandingkan beragam konfigurasi untuk fungsi aktivasi. Dalam percobaan ini, kami akan melatih dan mengevaluasi tiga model berbeda yang menggunakan pendekatan fungsi aktivasi berbeda.


*   Model 1: Fungsi aktivasi ReLU. Fungsi aktivasi ini diterapkan pada lapisan pertama (fc1) dan membantu mempercepat konvergensi selama pelatihan.
*   Model 2: Fungsi aktivasi Sigmoid. Fungsi ini menyesuaikan keluaran lapisan pertama ke kisaran antara 0 dan 1, terbukti berguna untuk tugas klasifikasi biner.
*   Model 3: Fungsi aktivasi Softmax. Fungsi ini diterapkan pada lapisan keluaran (fc2) untuk menghasilkan distribusi probabilitas, khususnya bermanfaat untuk tugas klasifikasi kelas jamak.


**ReLU activation funcion**

In [None]:
# Model 1: ReLU activation function

class Model1(nn.Module): # Define a neural network model named Model1 that inherits from nn.Module
  def __init__(self): # Constructor method (__init__) initializes the object when an instance is created
      super(Model1, self).__init__() # Initialize the Model1 class as a subclass of nn.Module
      self.fc1 = nn.Linear(28*28, 128) # Input size: 28*28, Output size: 128
      self.fc2 = nn.Linear(128, 10) # Input size: 128, Output size: 10

  def forward(self, x): # Forward method defines the forward pass of the neural network
      x = x.view(-1, 28*28) # Reshape the input tensor x to have dimensions (-1, 28*28)
      x = torch.relu(self.fc1(x)) # Apply the ReLU activation function to the output of fc1
      x = self.fc2(x) # Pass the result through fc2 (no activation function here)
      return x # Return the final output

**Sigmoid activation function**

In [None]:
# Model 2: Sigmoid activation function

class Model2(nn.Module): # Define a neural network model named Model2 that inherits from nn.Module
  def __init__(self): # Constructor method (__init__) initializes the object when an instance is created
      super(Model2, self).__init__() # Initialize the Model2 class as a subclass of nn.Module
      self.fc1 = nn.Linear(28*28, 128) # Input size: 28*28, Output size: 128
      self.fc2 = nn.Linear(128, 10) # Input size: 128, Output size: 10

  def forward(self, x):  # Forward method defines the forward pass of the neural network
      x = x.view(-1, 28*28) # Reshape the input tensor x to have dimensions (-1, 28*28)
      x = torch.sigmoid(self.fc1(x)) # Apply the sigmoid activation function to the output of fc1
      x = self.fc2(x) # Pass the result through fc2 (no activation function here)
      return x # Return the final output

**Softmax activation function**

In [None]:
# Model 3: Softmax activation function

class Model3(nn.Module): # Define a neural network model named Model3 that inherits from nn.Module
  def __init__(self): # Constructor method (__init__) initializes the object when an instance is created
      super(Model3, self).__init__() # Initialize the Model2 class as a subclass of nn.Module
      self.fc1 = nn.Linear(28*28, 128)  # Input size: 28*28, Output size: 128
      self.fc2 = nn.Linear(128, 10) # Input size: 128, Output size: 10

  def forward(self, x): # Forward method defines the forward pass of the neural network
      x = x.view(-1, 28*28) # Reshape the input tensor x to have dimensions (-1, 28*28)
      x = self.fc1(x)  # Pass the input through fc1 without an activation function
      x = torch.softmax(x, dim=1) # Apply the softmax activation function to the output of fc1 along dimension 1
      x = self.fc2(x) # Pass the result through fc2 without an activation function
      return x # Return the final output

### Loss Function menggunakan Cross Entropy loss function

In [None]:
cross_el = nn.CrossEntropyLoss() # Using Cross Entropy loss function
learning_rate = 0.01 # Configuring the learning rate
momentum = 0.9 # Configuring momentum
num_epochs = 5 # Number of epochs or training iterations

Dengan konfigurasi ini, model dapat dilatih menggunakan pendekatan pelatihan yang sesuai untuk Cross Entropy Loss Function, dengan kecepatan pembelajaran, momentum, dan jumlah epoch tertentu.

In [None]:
models = [Model1(), Model2(), Model3()]
accuracies = []

for i, model in enumerate(models):
  print(f"Model {i+1}")
  optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

  for epoch in range(num_epochs):
    running_loss = 0.0
    for j, data in enumerate(train_loader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = cross_el(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if j % 100 == 99:
            print(f"[{epoch+1}, {j+1}] loss: {running_loss/100:.3f}")
            running_loss = 0.0

  correct = 0
  total = 0
  with torch.no_grad():
      for data in test_loader:
          images, labels = data
          outputs = model(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

  accuracy = 100 * correct / total
  print(f"Accuracy: {accuracy}%")
  accuracies.append(accuracy)

print("Accuracies:", accuracies)

Model 1
[1, 100] loss: 1.227
[1, 200] loss: 0.660
[1, 300] loss: 0.681
[1, 400] loss: 0.677
[1, 500] loss: 0.583
[1, 600] loss: 0.563
[1, 700] loss: 0.489
[1, 800] loss: 0.436
[1, 900] loss: 0.424
[1, 1000] loss: 0.505
[1, 1100] loss: 0.443
[1, 1200] loss: 0.434
[1, 1300] loss: 0.459
[1, 1400] loss: 0.446
[1, 1500] loss: 0.491
[1, 1600] loss: 0.402
[1, 1700] loss: 0.400
[1, 1800] loss: 0.407
[1, 1900] loss: 0.400
[1, 2000] loss: 0.343
[1, 2100] loss: 0.369
[1, 2200] loss: 0.480
[1, 2300] loss: 0.479
[1, 2400] loss: 0.386
[1, 2500] loss: 0.325
[1, 2600] loss: 0.378
[1, 2700] loss: 0.415
[1, 2800] loss: 0.377
[1, 2900] loss: 0.335
[1, 3000] loss: 0.364
[1, 3100] loss: 0.349
[1, 3200] loss: 0.312
[1, 3300] loss: 0.387
[1, 3400] loss: 0.320
[1, 3500] loss: 0.278
[1, 3600] loss: 0.308
[1, 3700] loss: 0.253
[1, 3800] loss: 0.334
[1, 3900] loss: 0.351
[1, 4000] loss: 0.375
[1, 4100] loss: 0.305
[1, 4200] loss: 0.302
[1, 4300] loss: 0.295
[1, 4400] loss: 0.293
[1, 4500] loss: 0.270
[1, 4600] l

In [None]:
print("Accuracies:", accuracies)

Accuracies: [94.18, 96.95, 72.21]


Berikut akurasi untuk setiap model dengan konfigurasi yang berbeda:


*   Model 1 :  Fungsi aktivasi ReLU = 94.18
*   Model 2 : Model 2: Fungsi aktivasi Sigmoid = 96.95
*   Model 3: Fungsi aktivasi Softmax = 72.21

Berdasarkan hasil evaluasi, terlihat bahwa model yang menggunakan aktivasi sigmoid memberikan performa terbaik dengan akurasi 96.95%. Model yang menggunakan aktivasi ReLU juga memberikan performa cukup baik dengan akurasi 94.18%. Sedangkan model yang menggunakan aktivasi Softmax memberirakan performa paling buruk dengan akurasi 72.21%. Dengan demikian pilihan fungsi aktivasi sangat berpengaruh dalam memilih fungsi aktivasi yang tepat sangat penting untuk mendapatkan hasil yang baik dalam tugas klasifikasi.

