# Two-layer perceptron

In [1]:
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

## load the data

In [2]:
training_data = np.genfromtxt('data/training_set.csv', delimiter=',')
validation_data = np.genfromtxt('data/validation_set.csv', delimiter=',')

In [147]:
def preprocess_data(data):
    return data[:,0:2],np.expand_dims(data[:,-1],axis=1)

In [148]:
training_inputs,training_outputs = preprocess_data(training_data)
(training_inputs.shape,training_outputs.shape)

((10000, 2), (10000, 1))

In [149]:
validation_inputs,validation_outputs = preprocess_data(training_data)

## Define the neural net

### 1. define a layer 

In [225]:
import numpy as np

class Layer_Perceptron:
    
    def __init__(self,number_of_input,number_of_neurons,activation_function,derivate_activation_function,weight=None,bias=None,random_weight=True,weight_range=0.2,bias_range=1):
        
        self.n = number_of_input
        self.m = number_of_neurons
        self._activation_function = activation_function
        self._derivate_activation_function = derivate_activation_function
        
        if (weight==None):
            if random_weight:
                self.w = weight_range*(2*np.random.rand(self.n,self.m)-1)
            else:
                self.w = np.zeros((self.n,self.m))
        
        else:
            if not(weight.shape== (self.n,)):
                raise ValueError(f"weight param should be of shape ({self.n},) but it is {weight.shape}")
            else:
                self.w = weight
        
        if (bias==None):
            if random_weight:
                self.b= bias_range*(2*np.random.rand(self.m,1)-1)
            else:
                self.b = 0
        
        else:
            self.b = bias
    
    
    def local_field(self,x):
        return ((self.w.T@x.T) + self.b).T
    
    def activate(self,x):        
        return self._activation_function(self.local_field(x))
    
    def grad(self,x):
        return self._derivate_activation_function(self.local_field(x))


In [226]:
activation_func = lambda x : np.tanh(x)
derivate_activation_func = lambda x :  (1-np.tanh(x)**2)

In [227]:
M1 = 3
M2 = 2

In [228]:
layer1 = Layer_Perceptron(training_inputs.shape[1],M1,activation_func,derivate_activation_func)
layer2 = Layer_Perceptron(layer1.w.shape[1],M2,activation_func,derivate_activation_func)
layer3 = Layer_Perceptron(layer2.w.shape[1],1,activation_func,derivate_activation_func)

In [229]:
layer1.activate(training_inputs[0:1,:]).shape

(1, 3)

In [230]:
training_inputs.shape

(10000, 2)

In [231]:
layer1.activate(training_inputs).shape

(10000, 3)

In [232]:
layer3.activate(layer2.activate(layer1.activate(training_inputs))).shape

(10000, 1)

### 2. Energies functions

In [233]:
class LossFunction:
    
    def __init__(self,loss,grad):
        self.loss = loss
        self.grad = grad
    
    def total_loss(self,inputs,outputs):
        return self.loss(inputs,outputs).mean()

In [234]:
def energy_func(inputs,real_values):
    return ((inputs-real_values)**2)

def energy_derivate(inputs,real_values):
    return 2*(inputs-real_values)

energy = LossFunction(energy_func,energy_derivate)

### 3. NeuralNet

In [290]:
class NeuralNet:
    
    def __init__(self,layers):
        
        self.L = len(layers)
        self.layers = layers
    
    def activate(self,inputs,l=None): 
        if l == None:
            l= self.L
        L = min(l -1 ,self.L-2)
        
        return self.layers[L+1].activate(self.local_field_forwad(inputs,L))
    
    def local_field_forwad(self,inputs,l):
        outputs = inputs
        for i in range(l):
            outputs = self.layers[i].activate(outputs)
        
        local_field = self.layers[l].local_field(outputs)
            
        return local_field  

In [291]:
nn = NeuralNet([layer1,layer2,layer3])

In [292]:
nn.layers[1].local_field(nn.layers[0].activate(training_inputs)).shape

(10000, 2)

In [293]:
nn.local_field_forwad(training_inputs,2)

array([[0.49370402],
       [0.49546274],
       [0.49347449],
       ...,
       [0.49664779],
       [0.49567216],
       [0.49138116]])

In [308]:
nn.activate(training_inputs,2).shape

(10000, 1)

In [239]:
energy.total_loss(nn.activate(training_inputs),training_outputs)

1.8541732665513955

## Gradient descent

In [240]:
loss = energy
inputs = training_inputs
outputs = training_outputs

In [296]:
grads_w = [np.zeros(layer.w.shape) for layer in nn.layers]
grads_b = [np.zeros(layer.b.shape) for layer in nn.layers]


back_prop = loss.grad(inputs,outputs)
for i in reversed(range(nn.L)):
    l = nn.layers[i]
    grad_layer = l.grad(nn.local_field_forwad(inputs,i))
    grad_w = nn.activate(inputs,i)*grad_layer*back_prop
    back_prop = grad_layer*l.w.sum(axis=0)*back_prop

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1 is different from 2)

In [299]:
nn.local_field_forwad(inputs,2).shape

(10000, 1)

In [302]:
nn.layers[2].n

2