# Activation Function

In [1]:
import numpy as np
np.random.seed(0) # in notebooks, this needs to be present in the cell where the random is being called

# new code
class Activation:
    def __init(self):
        pass
    def forward(self, inputs):
        pass
    def __call__(self, arg):
        return self.forward(arg)

# ReLU function
class Act_ReLU(Activation):
    def forward(self, inputs):
        return np.maximum(0, inputs)

class Act_Tanh(Activation):
    def forward(self, inputs):
        return np.tanh(inputs)

class Act_Sigmoid(Activation):
    def forward(self, inputs):
        lambda_sigmoid = lambda i: 1/(1+(1/np.exp(i)))
        np_sigmoid = np.vectorize(lambda_sigmoid)
        return np_sigmoid(inputs)

class Act_Softmax(Activation):
    def forward(self, inputs):
        exp = np.exp(inputs)
        return exp / np.sum(exp)

# imported from notebook: 01-perceptron-layer.ipynb
class Layer:
    def __init__(self, n_inputs, n_neurons, activation_fn, weights=None, biases=None):
        if weights is None:
            self.weights = 0.1 * np.random.randn(n_inputs, n_neurons) # multiplying with 0.1 to keep the range within (-1, 0, 1)
        else:
            self.weights = np.transpose(weights) # used to test the correctino of my code

        if biases is None:
            self.biases = np.zeros((1, n_neurons))
        else:
            self.biases = biases
        self.activation = activation_fn()  # new code - initialise the activation class

    def forward(self, inputs):
        # modified to execute the activation forward code
        self.output_raw = np.array(np.dot(inputs, self.weights) + self.biases)
        self.output = self.activation.forward(self.output_raw)
        return self.output

    def __call__(self, arg):
        return self.forward(arg)

In [2]:
layer1 = Layer(3, 4, Act_ReLU)
layer1([1,2,3])

layer2 = Layer(4, 5, Act_Tanh)
layer2(layer1.output)
layer2.output

layer3 = Layer(5, 2, Act_Sigmoid)
layer3(layer2.output)
layer3.output

soft = Act_Softmax()
soft(layer3.output)

array([[0.50042009, 0.49957991]])

### PyTorch

In [3]:
import torch
from torch import nn

model = nn.Sequential(
    nn.Linear(3, 4),
    nn.ReLU(),
    nn.Linear(4, 5),
    nn.Tanh(),
    nn.Linear(5, 2),
    nn.Sigmoid()
)

model(torch.tensor([1.,2.,3.]))

tensor([0.5158, 0.4209], grad_fn=<SigmoidBackward0>)

## Comparing the implementations

### Single Layer implementation

In [4]:
## Pytorch
test_py1 = nn.Linear(3, 4)
test_py1(torch.tensor([1.,2.,3.]))

tensor([ 0.6231, -1.9106, -1.1200, -1.4478], grad_fn=<ViewBackward0>)

In [5]:
test_layer1 = Layer(3, 4, Act_ReLU, test_py1.weight.detach().numpy(), test_py1.bias.detach().numpy())
test_layer1([1.,2.,3.])
print(test_layer1.output_raw)

[ 0.62307758 -1.91059069 -1.11996911 -1.44778582]


### Multi Layer implementation

In [6]:
test_py2 = nn.Sequential(
    nn.Linear(3, 4),
    nn.ReLU(),
    nn.Linear(4, 5),
    nn.Tanh(),
    nn.Linear(5, 2),
    nn.Sigmoid()
)

test_py2_out = test_py2(torch.tensor([1.,2.,3.]))
print(test_py2_out)

tensor([0.5150, 0.5772], grad_fn=<SigmoidBackward0>)


In [7]:
layer_t2_1 = Layer(3, 4, Act_ReLU, test_py2[0].weight.detach().numpy(), test_py2[0].bias.detach().numpy())
layer_t2_1([1.,2.,3.])

layer_t2_2 = Layer(4, 5, Act_Tanh, test_py2[2].weight.detach().numpy(), test_py2[2].bias.detach().numpy())
layer_t2_2(layer_t2_1.output)

layer_t2_3 = Layer(5, 2, Act_Sigmoid, test_py2[4].weight.detach().numpy(), test_py2[4].bias.detach().numpy())
layer_t2_3(layer_t2_2.output)

array([0.51495674, 0.57716869])

In [8]:
nn.Softmax(dim=0)(test_py2_out)

tensor([0.4845, 0.5155], grad_fn=<SoftmaxBackward0>)

In [9]:
soft = Act_Softmax()
soft(layer_t2_3.output)

array([0.48445203, 0.51554797])

**Thus my implementation of ReLU, Tanh, Sigmoid and Softmax is correct**