# Activation Function

In [89]:
import numpy as np
np.random.seed(0) # in notebooks, this needs to be present in the cell where the random is being called
np.set_printoptions(precision=4)
                      
# new code
class Activation:
    def __init__(self):
        pass
    def forward(self, inputs):
        pass
    def __call__(self, arg):
        return self.forward(arg)

# ReLU function
class Act_Linear(Activation):
    def forward(self, inputs):
        return inputs
    
class Act_ReLU(Activation):
    def forward(self, inputs):
        return np.maximum(0, inputs)

class Act_Tanh(Activation):
    def forward(self, inputs):
        return np.tanh(inputs)

class Act_Sigmoid(Activation):
    def forward(self, inputs):
        return 1/(1+np.exp(-inputs))

class Act_Softmax(Activation):
    def forward(self, inputs):
        exp = np.exp(inputs)
        return exp / np.sum(exp)

# imported from notebook: 01-perceptron-layer.ipynb
class Layer:
    def __init__(self, n_inputs, n_neurons, activation_fn, weights=None, biases=None):
        if activation_fn is None:
            activation_fn = Act_Linear

        if activation_fn is Act_Softmax:
            raise Exception("Softmax is not supported as an activation function, use it after the output")
            
        if weights is None:
            self.weights = 0.1 * np.random.randn(n_neurons, n_inputs) # multiplying with 0.1 to keep the range within (-1, 0, 1)
        else:
            self.weights = weights # used to test the correction of my code

        if biases is None:
            self.biases = np.zeros((1, n_neurons))
        else:
            self.biases = biases
        self.activation = activation_fn()  # new code - initialise the activation class

    def forward(self, inputs):
        # modified to execute the activation forward code
        self.inputs = inputs
        output_raw = np.dot(self.inputs, self.weights.T) + self.biases
        self.output = self.activation.forward(output_raw)
        return self.output

    def __call__(self, arg):
        return self.forward(arg)

In [90]:
inp = np.array([[1.,2.,3.]])
layer1 = Layer(3, 4, Act_ReLU)
layer1(inp)

layer2 = Layer(4, 5, Act_Tanh)
layer2(layer1.output)
layer2.output

layer3 = Layer(5, 2, Act_Sigmoid)
layer3(layer2.output)
layer3.output

soft = Act_Softmax()
soft(layer3.output)

array([[0.5019, 0.4981]])

### PyTorch

In [91]:
import torch
from torch import nn

model = nn.Sequential(
    nn.Linear(3, 4),
    nn.ReLU(),
    nn.Linear(4, 5),
    nn.Tanh(),
    nn.Linear(5, 2),
    nn.Sigmoid()
)

model(torch.tensor([1.,2.,3.]))

tensor([0.4718, 0.5779], grad_fn=<SigmoidBackward0>)

## Comparing the implementations

### Single Layer implementation

In [92]:
## Pytorch
test_py1 = nn.Linear(3, 4)
test_py1(torch.tensor([1.,2.,3.]))


tensor([ 2.4948,  0.6741, -1.6913, -0.9494], grad_fn=<ViewBackward0>)

In [93]:
test_layer1 = Layer(3, 4, None, test_py1.weight.detach().numpy(), test_py1.bias.detach().numpy())
test_layer1(inp)

array([[ 2.4948,  0.6741, -1.6913, -0.9494]])

### Multi Layer implementation

In [94]:
test_py2 = nn.Sequential(
    nn.Linear(3, 4),
    nn.ReLU(),
    nn.Linear(4, 5),
    nn.Tanh(),
    nn.Linear(5, 2),
    nn.Sigmoid()
)

test_py2_out = test_py2(torch.tensor([1.,2.,3.]))
print(test_py2_out)

tensor([0.6193, 0.5300], grad_fn=<SigmoidBackward0>)


In [95]:
layer_t2_1 = Layer(3, 4, Act_ReLU, test_py2[0].weight.detach().numpy(), test_py2[0].bias.detach().numpy())
layer_t2_1(inp)

layer_t2_2 = Layer(4, 5, Act_Tanh, test_py2[2].weight.detach().numpy(), test_py2[2].bias.detach().numpy())
layer_t2_2(layer_t2_1.output)

layer_t2_3 = Layer(5, 2, Act_Sigmoid, test_py2[4].weight.detach().numpy(), test_py2[4].bias.detach().numpy())
layer_t2_3(layer_t2_2.output)

array([[0.6193, 0.53  ]])

In [96]:
nn.Softmax(dim=0)(test_py2_out)

tensor([0.5223, 0.4777], grad_fn=<SoftmaxBackward0>)

In [97]:
soft = Act_Softmax()
soft(layer_t2_3.output)

array([[0.5223, 0.4777]])

**Thus my implementation of ReLU, Tanh, Sigmoid and Softmax is correct**