- two features
- one hidden layer with two neurons with relu activation
- 2 output classes with sigmoid
- loss as mse
- derivation of softmax
- derivation of catagorical loss

In [1]:
import torch

# Previous implementations

## Dense layer

In [2]:
class DenseLayer:

    def __init__(self,n_features, n_neurons):
        self.weights = torch.rand((n_features,n_neurons))
        self.bias = torch.rand(n_neurons)

    def forward(self,inputs):
        self.output = torch.matmul(inputs,self.weights) + self.bias

## Accuracy

In [3]:
class Accuracy:
    def __init__(self):
        self.output = 0
    def forward(self,y_pred,y_true):
        if y_pred.shape != y_true.shape:
            one_hot_notation = torch.zeros(y_pred.shape)
            one_hot_notation[range(len(y_pred)),y_true] = 1
        else:
            one_hot_notation = y_true
        correct_values = y_pred==one_hot_notation
        correct_values = correct_values * one_hot_notation
        self.output = torch.sum(correct_values) / len(y_pred)
        return self.output

## Activation Simgoid

In [4]:
class Activation_Sigmoid:
    def forward(self,input):
        self.output = 1 / (1 + torch.exp(-input))
        return self.output

## Activation ReLU

In [5]:
class Activation_ReLU:
    def forward(self,inputs):
        self.output = torch.max(inputs,torch.tensor(0))
        return self.output



## Loss categorical

In [6]:
class Loss_Catagorical:
    def forward(self,y_pred,y_true):
        if y_pred.shape !=  y_true.shape:
            y_true -= 1
            one_hot_notation = torch.zeros(y_pred.shape)
            one_hot_notation[range(len(y_pred)),y_true] = 1
        else:
            one_hot_notation = y_pred
        loss = -torch.sum(one_hot_notation * torch.log(y_pred)) / len(y_true)
        self.output = loss
        return loss

## Activation Softmax

In [7]:
class Activation_Softmax:
    def forward(self,inputs):
        pow = torch.exp(inputs - torch.max(inputs, axis=1, keepdims=True)[0])
        summ = torch.sum(pow,axis=1,keepdims = True)
        ans = pow / summ
        self.output = ans
        return ans


# Model

In [9]:
class Classification_Model:
    def __init__(self):
        self.layer1 = DenseLayer(2,2)
        self.layer1_activation = Activation_ReLU()
        self.output_layer = DenseLayer(2,2)
        self.output_layer_activation = Activation_Sigmoid()
        self.error = float('inf')

    def forward_prop(self,inputs):
        self.input = inputs
        self.layer1.forward(inputs)
        self.layer1_activation.forward(self.layer1.output)
        self.output_layer.forward(self.layer1_activation.output)
        self.output_layer_activation.forward(self.output_layer.output)

    def calc_error(self,y_true):
        if self.output_layer_activation.output.shape != y_true.shape:
            one_hot_notation = torch.zeros(self.output_layer_activation.output.shape)
            one_hot_notation[y_true] = 1
        else:
            one_hot_notation = y_true
        self.y_true = one_hot_notation
        self.error = (self.output_layer_activation.output - one_hot_notation)
        self.mse = torch.mean(self.error)

    def back_prop(self,lr):
        dloss_by_dsig = self.error
        dsig_by_layer2 = (torch.tensor([1]) - self.output_layer_activation.output) * self.output_layer_activation.output

        # layer 2
        back2 = dloss_by_dsig * dsig_by_layer2
        layer2_grad = torch.cat((self.layer1_activation.output.unsqueeze(dim=0),self.layer1_activation.output.unsqueeze(dim=0)),dim=0).T * back2
        self.output_layer.weights -= torch.tensor([lr]) * layer2_grad
        self.output_layer.bias -= torch.tensor([lr]) * back2

        # layer 1
        drelu_by_dlayer1 = self.layer1.output>0
        back1 = drelu_by_dlayer1 * torch.sum(self.output_layer.weights * back2,dim=1,keepdims=True).squeeze()
        layer1_grad = torch.cat((self.input.unsqueeze(dim=0),self.input.unsqueeze(dim=0)),dim=0).T * back1
        self.layer1.weights -= torch.tensor([lr]) * layer1_grad
        self.layer1.bias -= torch.tensor([lr]) * back1


    def fit(self,inputs,y_true,epoches=1000,lr=0.01):
        for epoch in range(epoches):
            for i in range(len(inputs)):
                self.forward_prop(inputs[i])
                self.calc_error(y_true[i])
                if self.mse < 0.1:
                    break
                self.back_prop(lr)
        error = torch.tensor([0,0],dtype = torch.float)
        for i in range(len(inputs)):
            self.forward_prop(inputs[i])
            self.calc_error(y_true[i])
            print("output:",self.output_layer_activation.output,"expected: ",self.y_true)
            print("squared error",self.error**2)
            print()
            error += self.error**2

        avg_error = error / len(inputs)
        print("average squared error",avg_error)
        print("over all mse:",torch.mean(avg_error))



In [11]:
model = Classification_Model()
inputs = torch.tensor([[1,2],[2,3],[5,6],[7,9],[7,8],[1,4]],dtype=torch.float)
y_true = torch.tensor([0,0,1,1,1,0])
model.fit(inputs,y_true,epoches=20000)

output: tensor([0.6713, 0.5280]) expected:  tensor([1., 0.])
squared error tensor([0.1080, 0.2788])

output: tensor([0.6618, 0.4874]) expected:  tensor([1., 0.])
squared error tensor([0.1144, 0.2376])

output: tensor([0.6325, 0.3692]) expected:  tensor([0., 1.])
squared error tensor([0.4001, 0.3979])

output: tensor([0.6106, 0.2936]) expected:  tensor([0., 1.])
squared error tensor([0.3728, 0.4990])

output: tensor([0.6124, 0.2976]) expected:  tensor([0., 1.])
squared error tensor([0.3750, 0.4934])

output: tensor([0.6680, 0.5183]) expected:  tensor([1., 0.])
squared error tensor([0.1102, 0.2686])

average squared error tensor([0.2468, 0.3625])
over all mse: tensor(0.3047)
