# Custom Activation Function
* Import
* Custom Activation Function: **Swish**
* Implementing Activation Function


## Import

In [1]:
import torch

from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader

import torch.nn as nn
import torch.optim as opt
from torch.autograd import Variable

## ReLU Activation Function

The ReLU function as follow:

$ f(x) = max(0, x) = \left\{\begin{matrix}
x_i & \text{if} \ x_i>0\\
0 &  \text{if} \ x_i<0 \\
\end{matrix}\right.$

In [55]:
def relu(x):
    _min = torch.tensor(0)
    return torch.maximum(_min, x)

In [58]:
relu(torch.tensor([-0.1, 0.2, 0.3]))

tensor([0.0000, 0.2000, 0.3000])

In [59]:
class MyReLU(nn.Module):
    def __init__(self):
        super(MyReLU, self).__init__()
        
    def relu(self, x):
        _min = torch.tensor(0)
        return torch.maximum(_min, x)
    
    def forward(self, x):
        return self.relu(x)

## Swish Activation Function

The Swish Function: 
$ f(x) = x * sigmoid(x) $

**Reference**
* Ramachandran, P., Zoph, B., & Le, Q. V. (2017). **Swish: a self-gated activation function.** arXiv preprint arXiv:[1710.05941](https://arxiv.org/pdf/1710.05941v1.pdf?source=post_page), 7, 1.

In [None]:
# swish function here
def swish(x):
    return x * torch.sigmoid(x)

In [8]:
class MySwish(nn.Module):
    def __init__(self, slope=1):
        super(MySwish, self).__init__()
        
    def swish(self, x):
        # swish function here
        return x * torch.sigmoid(x)
    
    def forward(self, x):
        return self.swish(x)

## 1. Implementing Swish Activation Function

* Set the device
* Dataset & DataLoader
* CNN Model
* Loss function & Optimizer
* Training Model
* Testing Model


### Set the Device

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [10]:
# hyperparameter 
train_batch_size = 100
test_batch_szie = 1000
learning_rate = 0.001
num_epochs = 5

### Dataset & DataLoader

In [11]:
train_dataset = MNIST(root = './data', train=True, download=True, transform=transforms.ToTensor())
test_dataset= MNIST(root = './data', train=False, download=True, transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [12]:
# train dataloader
train_loader = DataLoader(
    dataset=train_dataset, 
    batch_size=train_batch_size, 
    shuffle=True
    )

# test dataloader
test_loader = DataLoader(
    dataset=test_dataset, 
    batch_size=test_batch_szie, 
    shuffle=False
    )

### CNN Model w/ Swish

* Add `MySwish` function into CNN Model.

In [14]:
class CNNwithSwish(nn.Module):
    def __init__(self):
        super(CNNwithSwish, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1),
            MySwish(),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            MySwish(),
            nn.MaxPool2d(stride=2, kernel_size=2)
        )
        
        self.dense = nn.Sequential(
            nn.Linear(in_features=14*14*128, out_features=1024),
            MySwish(),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        output = self.conv_layers(x)
        output = output.view(-1, 14*14*128)
        output = self.dense(output)
        return output

In [16]:
model_swish = CNNwithSwish().to(device)

### Loss Function & Optimizer

In [17]:
loss_func = nn.CrossEntropyLoss()
optimizer = opt.Adam(model_swish.parameters(), lr=learning_rate)

### Training Model

In [18]:
for epoch in range(num_epochs):
    for idx, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model_swish(images)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()

        if (idx+1)%200 == 0:
            print("Epoch: %d, Batch: %d, Loss: %.4f" %(epoch+1, idx+1, loss.data))

Epoch: 1, Batch: 200, Loss: 0.2603
Epoch: 1, Batch: 400, Loss: 0.0214
Epoch: 1, Batch: 600, Loss: 0.0175
Epoch: 2, Batch: 200, Loss: 0.0308
Epoch: 2, Batch: 400, Loss: 0.0417
Epoch: 2, Batch: 600, Loss: 0.0167
Epoch: 3, Batch: 200, Loss: 0.0195
Epoch: 3, Batch: 400, Loss: 0.0192
Epoch: 3, Batch: 600, Loss: 0.0444
Epoch: 4, Batch: 200, Loss: 0.0010
Epoch: 4, Batch: 400, Loss: 0.0208
Epoch: 4, Batch: 600, Loss: 0.0006
Epoch: 5, Batch: 200, Loss: 0.0558
Epoch: 5, Batch: 400, Loss: 0.0055
Epoch: 5, Batch: 600, Loss: 0.0131


### Testing Model

In [19]:
correct = 0
total = 0
for images, labels in test_loader:
  images = Variable(images.to(device))
  outputs = model_swish(images)

  _, pred = torch.max(outputs.data, 1)
  
  correct += (pred == labels.to(device)).sum()
  total += labels.size(0)

print('Accuracy:%.3f%%' %(100.0 * float(correct)/float(total)))

Accuracy:98.680%


## 2. Implementing ReLU Activation Function

* CNN Model
* Loss function & Optimizer
* Training Model
* Testing Model



### CNN Model w/ ReLU

* Add `MyReLU` function into CNN Model.

In [60]:
class CNNwithReLU(nn.Module):
    def __init__(self):
        super(CNNwithReLU, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1),
            MyReLU(),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            MyReLU(),
            nn.MaxPool2d(stride=2, kernel_size=2)
        )
        
        self.dense = nn.Sequential(
            nn.Linear(in_features=14*14*128, out_features=1024),
            MyReLU(),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        output = self.conv_layers(x)
        output = output.view(-1, 14*14*128)
        output = self.dense(output)
        return output

In [61]:
model_relu = CNNwithReLU().to(device)

### Loss Function & Optimizer

In [62]:
loss_func = nn.CrossEntropyLoss()
optimizer = opt.Adam(model_relu.parameters(), lr=learning_rate)

### Training Model

In [63]:
for epoch in range(num_epochs):
    for idx, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model_relu(images)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()

        if (idx+1)%200 == 0:
            print("Epoch: %d, Batch: %d, Loss: %.4f" %(epoch+1, idx+1, loss.data))

Epoch: 1, Batch: 200, Loss: 0.0762
Epoch: 1, Batch: 400, Loss: 0.0373
Epoch: 1, Batch: 600, Loss: 0.0867
Epoch: 2, Batch: 200, Loss: 0.0215
Epoch: 2, Batch: 400, Loss: 0.0190
Epoch: 2, Batch: 600, Loss: 0.0160
Epoch: 3, Batch: 200, Loss: 0.0057
Epoch: 3, Batch: 400, Loss: 0.0038
Epoch: 3, Batch: 600, Loss: 0.0083
Epoch: 4, Batch: 200, Loss: 0.0174
Epoch: 4, Batch: 400, Loss: 0.0593
Epoch: 4, Batch: 600, Loss: 0.0237
Epoch: 5, Batch: 200, Loss: 0.0045
Epoch: 5, Batch: 400, Loss: 0.0005
Epoch: 5, Batch: 600, Loss: 0.0285


### Testing Model

In [65]:
correct = 0
total = 0
for images, labels in test_loader:
  images = Variable(images.to(device))
  outputs = model_relu(images)

  _, pred = torch.max(outputs.data, 1)
  
  correct += (pred == labels.to(device)).sum()
  total += labels.size(0)

print('Accuracy:%.3f%%' %(100.0 * float(correct)/float(total)))

Accuracy:98.920%
