# Two-layer perceptron

In [1]:
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

## load the data

In [2]:
training_data = np.genfromtxt('data/training_set.csv', delimiter=',')
validation_data = np.genfromtxt('data/validation_set.csv', delimiter=',')

In [147]:
def preprocess_data(data):
    return data[:,0:2],np.expand_dims(data[:,-1],axis=1)

In [148]:
training_inputs,training_outputs = preprocess_data(training_data)
(training_inputs.shape,training_outputs.shape)

((10000, 2), (10000, 1))

In [149]:
validation_inputs,validation_outputs = preprocess_data(training_data)

## Define the neural net

### 1. define a layer 

In [387]:
import numpy as np

class Layer_Perceptron:
    
    def __init__(self,number_of_input,number_of_neurons,activation_function,derivate_activation_function,weight=None,bias=None,random_weight=True,weight_range=0.2,bias_range=1):
        
        self.n = number_of_input
        self.m = number_of_neurons
        self._activation_function = activation_function
        self._derivate_activation_function = derivate_activation_function
        
        if (weight==None):
            if random_weight:
                self.w = weight_range*(2*np.random.rand(self.n,self.m)-1)
            else:
                self.w = np.zeros((self.n,self.m))
        
        else:
            if not(weight.shape== (self.n,)):
                raise ValueError(f"weight param should be of shape ({self.n},) but it is {weight.shape}")
            else:
                self.w = weight
        
        if (bias==None):
            if random_weight:
                self.b= bias_range*(2*np.random.rand(self.m,1)-1)
            else:
                self.b = 0
        
        else:
            self.b = bias
    
    
    def local_field(self,x):
        return ((self.w.T@x.T) + self.b).T
    
    def activate(self,x):        
        return self._activate_lf(self.local_field(x))
    
    def _activate_lf(self,lf):
        return self._activation_function(lf)
    
    def grad(self,x):
        return self._grad_lf(self.local_field(x))

    def _grad_lf(self,lf):
        return self._derivate_activation_function(lf)
    


In [388]:
activation_func = lambda x : np.tanh(x)
derivate_activation_func = lambda x :  (1-np.tanh(x)**2)

In [619]:
M1 = 3
M2 = 4

In [620]:
layer1 = Layer_Perceptron(training_inputs.shape[1],M1,activation_func,derivate_activation_func)
layer2 = Layer_Perceptron(layer1.w.shape[1],M2,activation_func,derivate_activation_func)
layer3 = Layer_Perceptron(layer2.w.shape[1],1,activation_func,derivate_activation_func)

In [621]:
layer1.activate(training_inputs[0:1,:]).shape

(1, 3)

In [622]:
training_inputs.shape

(10000, 2)

In [623]:
layer1.activate(training_inputs).shape

(10000, 3)

In [624]:
layer1.grad(training_inputs).shape

(10000, 3)

In [625]:
layer3.activate(layer2.activate(layer1.activate(training_inputs))).shape

(10000, 1)

### 2. Energies functions

In [626]:
class LossFunction:
    
    def __init__(self,loss,grad):
        self.loss = loss
        self.grad = grad
    
    def total_loss(self,inputs,outputs):
        return self.loss(inputs,outputs).mean()
    
    def total_grad(self,inputs,outputs):
        return self.grad(inputs,outputs).mean()


In [627]:
def energy_func(inputs,real_values):
    return ((inputs-real_values)**2)

def energy_derivate(inputs,real_values):
    return 2*(inputs-real_values)

energy = LossFunction(energy_func,energy_derivate)

### 3. NeuralNet

In [703]:
class NeuralNet:
    
    def __init__(self,layers):
        
        self.L = len(layers)
        self.layers = layers
    
    def activate(self,inputs,l=None): 
        if l == None:
            l= self.L-1
        if l ==-1:
            return inputs
        return self.layers[l]._activate_lf(self.local_field_forwad(inputs,l))
        
        
    def local_field_forwad(self,inputs,l):
        outputs = inputs
        for i in range(l):
            outputs = self.layers[i].activate(outputs)
        
 
        local_field = self.layers[l].local_field(outputs)
            
        return local_field  

In [704]:
nn = NeuralNet([layer1,layer2,layer3])

In [705]:
nn.local_field_forwad(training_inputs,2).shape

(10000, 1)

In [706]:
nn.activate(training_inputs,2).shape

(10000, 1)

## Gradient descent

In [707]:
loss = energy
inputs = training_inputs[0:10]
outputs = training_outputs[0:10]

In [708]:
loss.grad(nn.activate(inputs),outputs).shape

(10, 1)

In [709]:
layer3.w.shape

(4, 1)

In [714]:
grads_w = [np.zeros(layer.w.shape) for layer in nn.layers]
grads_b = [np.zeros(layer.b.shape) for layer in nn.layers]


back_prop = loss.grad(nn.activate(inputs),outputs)
for i in reversed(range(nn.L)):
    l = nn.layers[i]
      
    grad_layer = l._grad_lf(nn.local_field_forwad(inputs,i))
    activation= nn.activate(inputs,i-1)
    
    print(i)
    print(grad_layer.shape)
    print(activation.shape)
    print(back_prop.shape)

    
    grad_w = activation*(back_prop*grad_layer)
    grad_b = np.ones(activation.shape)*(back_prop*grad_layer)
    
    print(grad_w.shape)
    back_prop = (grad_layer*back_prop)@l.w.T
    

    print("")


2
(10, 1)
(10, 4)
(10, 1)
(10, 4)

1
(10, 4)
(10, 3)
(10, 4)


ValueError: operands could not be broadcast together with shapes (10,3) (10,4) 

In [578]:
nn.local_field_forwad(inputs,2).shape

(10, 1)

In [407]:
nn.local_field_forwad(inputs,2).shape

(10000, 1)

In [408]:
layer1.activate(la)

NameError: name 'la' is not defined

In [374]:
nn.layers[2]._grad_lf(nn.local_field_forwad(inputs,2))

array([[0.54472783],
       [0.54755609],
       [0.54400758],
       ...,
       [0.54901909],
       [0.54714307],
       [0.53900049]])