# Custom Activation Function
* Import
* Custom Activation Function: **Swish**
* Implementing Activation Function


## Import

In [None]:
import torch

from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader

import torch.nn as nn
import torch.optim as opt
from torch.autograd import Variable

## Swish Activation Function

The Swish Function: 
$ f(x) = x * sigmoid(x) $

**Reference**
* Ramachandran, P., Zoph, B., & Le, Q. V. (2017). **Swish: a self-gated activation function.** arXiv preprint arXiv:[1710.05941](https://arxiv.org/pdf/1710.05941v1.pdf?source=post_page), 7, 1.

In [None]:
# swish function here
def swish(x):
    return x * torch.sigmoid(x)

In [None]:
class Swish(nn.Module):
    def __init__(self, slope=1):
        super().__init__()
        
    def swish(self, x):
        # swish function here
        return x * torch.sigmoid(x)
    
    def forward(self, x):
        return self.swish(x)

##  Implementing Activation Function

* Set the device
* Dataset & DataLoader
* CNN Model
* Loss function & Optimizer
* Training Model
* Testing Model



### Set the Device

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# hyperparameter 
train_batch_size = 100
test_batch_szie = 1000
learning_rate = 0.001
num_epochs = 5

### Dataset & DataLoader

In [None]:
train_dataset = MNIST(root = './data', train=True, download=True, transform=transforms.ToTensor())
test_dataset= MNIST(root = './data', train=False, download=True, transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [None]:
# train dataloader
train_loader = DataLoader(
    dataset=train_dataset, 
    batch_size=train_batch_size, 
    shuffle=True
    )

# test dataloader
test_loader = DataLoader(
    dataset=test_dataset, 
    batch_size=test_batch_szie, 
    shuffle=False
    )

### CNN Model

* Add `Swish` function into CNN Model.

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1),
            Swish(),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            Swish(),
            nn.MaxPool2d(stride=2, kernel_size=2)
        )
        
        self.dense = nn.Sequential(
            nn.Linear(in_features=14*14*128, out_features=1024),
            Swish(),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        output = self.conv_layers(x)
        output = output.view(-1, 14*14*128)
        output = self.dense(output)
        return output

In [None]:
model = CNN().to(device)

### Loss Function & Optimizer

In [None]:
loss_func = nn.CrossEntropyLoss()
optimizer = opt.Adam(model.parameters(), lr=learning_rate)

### Training Model

In [None]:
for epoch in range(num_epochs):
    for idx, (images, labels) in enumerate(train_loader):
        images = Variable(images.to(device))
        labels = Variable(labels.to(device))

        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()

        if (idx+1)%200 == 0:
            print("Epoch: %d, Batch: %d, Loss: %.4f" %(epoch+1, idx+1, loss.data))

Epoch: 1, Batch: 200, Loss: 0.1025
Epoch: 1, Batch: 400, Loss: 0.1080
Epoch: 1, Batch: 600, Loss: 0.0701
Epoch: 2, Batch: 200, Loss: 0.0339
Epoch: 2, Batch: 400, Loss: 0.1102
Epoch: 2, Batch: 600, Loss: 0.0317
Epoch: 3, Batch: 200, Loss: 0.0241
Epoch: 3, Batch: 400, Loss: 0.0336
Epoch: 3, Batch: 600, Loss: 0.0035
Epoch: 4, Batch: 200, Loss: 0.0156
Epoch: 4, Batch: 400, Loss: 0.0106
Epoch: 4, Batch: 600, Loss: 0.0540
Epoch: 5, Batch: 200, Loss: 0.0134
Epoch: 5, Batch: 400, Loss: 0.0334
Epoch: 5, Batch: 600, Loss: 0.0008


### Testing Model

In [None]:
correct = 0
total = 0
for images, labels in test_loader:
  images = Variable(images.to(device))
  outputs = model(images)

  _, pred = torch.max(outputs.data, 1)
  
  correct += (pred == labels.to(device)).sum()
  total += labels.size(0)

print('Accuracy:%.3f%%' %(100.0 * float(correct)/float(total)))

Accuracy:98.640%
