Importing needed tools:

In [45]:
import random
from math import tanh, exp

Class of the network definition and initializing a setting:


In [46]:
#set eta for from the formula
eta=0.3
# We have decided to program a perceptron as a class datatype to avoid passing data for every function and initialization convinience
class Perceptron:
    def __init__(self, layers_structure, transition_funcs, learning_rate, random_seed):
        # We control randomness of each run by setting the seed
        random.seed(random_seed)
        self.num_layers = len(layers_structure)
        self.num_X, self.num_y = layers_structure[0], layers_structure[-1]

        # a neuron with (2, 3, 4) structure will have two 2x3 and 3x4 matricies as its weights
        # we initialize the weights and biases with a random number between -2 and 2
        self.weights = [[[(random.random()-0.5)*4 for i in range(layers_structure[h+1])]
                            for j in range(layers_structure[h])]
                                        for h in range(self.num_layers-1)]

        self.biases = [[[(random.random()-0.5)*4 for i in range(layers_structure[h+1])]] for h in range(self.num_layers-1)]

        # print(self.weights)

        # assert len(transition_funcs) == len(self.weights)

        # function names passed as strings are converted into function object via dictionaries
        trans_dict = {"tanh": tanh,
                      "logistic": lambda x: 1 / (1 + exp(-x)),
                      "identity": lambda x: x}

        dtrans_dict = {"tanh": lambda x: 1-x**2,
                      "logistic": lambda x: x*(1-x),
                      "identity": lambda x: x}
                    
        self.f_trans = [trans_dict[f] for f in transition_funcs]
        self.df_trans = [dtrans_dict[f] for f in transition_funcs]

        self.learning_rate = learning_rate

    
    # one forward of a data through the network and returning the output of each neurone
    def forward(self, p_X,p_y):
        # list used to cache output of every neuron
        cache_f_net = []
            
        # with this loop, we feed our data forward into all of the layers
        layer_input = p_X
        for layer_index in range(self.num_layers-1):
            net = self._matrix_add(self._matrix_mul(layer_input, self.weights[layer_index]), self.biases[layer_index])
            out = [[self.f_trans[layer_index](y) for y in row] for row in net]
            cache_f_net.append(out.copy())

            layer_input = out
        
        return cache_f_net

    #back propagation of error and finding the weights using gradient decent
    def back_prop(self,p_x,p_y,cache):
        # this function takes a y,x of teacher and the output of each 
        # neurone when forwarding this data through the network
        global eta
        d_y=[[]]
        cache=[p_x]+cache
        
        # Here the result of gradient of each neurone of the outputlayer is calculated
        # because we have different f() for each layer, here we have an array of functions
        # the index of the array is the number of the layer
        for y in cache[-1][0]:
            d_y[0].append(self.df_trans[-1](y))
        out_delta=self._matrix_dot_mul(self._matrix_sub(p_y,cache[-1]),d_y)
        #delta of the last layer is calculated

        #here we calculated the Delta_Weights using the delta values of last step
        w_def_out=[[outh*delta*eta for outh in cache[-2][0] ] for delta in out_delta[0]]
        
        # Here we calculated the new weights for last layer using the Delta_weights of last step
        for i in range(len(cache[-1][0])):
            self.weights[-1][i][0]+=w_def_out[0][i]
        w_def_all=[]
        w_def_all.append(w_def_out)

        # in this loop we calculate the delta for each step and using that we calculate
        # the Delta_Weight (difference of the new weight from previus one), for each layer we have an iteration
        for i in range(len(cache)-3,-1,-1):
            
            #out_delta_pre is an array of the delta of previous layer
            out_delta_pre=out_delta.copy()
            out_delta=[[0 for x in range(len(cache[i+1][0]))]]
            w_def=[[0 for x in cache[i+1][0]] for r in cache[i][0]]
            #in w_def the difference of the weights for this layer should be stored
            
            #for eache neurone of this layer we calculate the delta then the Delta_weight and then the new Weight
            #start of calculating the delta for the neurone based on the delta of the last layer
            for n in range(len(cache[i+1][0])):
                sigma_delta=0
                for k in range(len(out_delta_pre[0])):
                   
                    sigma_delta+=out_delta_pre[0][k]*self.weights[i+1][n][k]
                    
                deltah=sigma_delta*self.df_trans[i](cache[i+1][0][n])
                out_delta[0][n]=deltah
                
               #now we have the delta and we can calculate the difference of weight (Delta_weight)
                for j in range(len(cache[i][0])):
                   
                    w_def[j][n]=eta*deltah*cache[i][0][j]
                
                w_def_all.insert(0,w_def)

            #here we apply the difference of the weight of this layerx
            for k in range(len(w_def[0])):
                for j in range(len(w_def)):
                
                    
                    self.weights[i][j][k]+=w_def[j][k]
         
    def train(self, X_train, y_train,X_test,y_test,epoch_count):
        for i in range(epoch_count):
            print("epoch: ",i," \n -----------------------------------")
            for p_X, p_y in zip(X_train, y_train):
                # for every instance, data is passed through the network and the outputs are returned
                iter_cache=self.forward(p_X,p_y)

                # We then, use thos values to calculate losses and update the weights
                self.back_prop(p_X,p_y,iter_cache)

            # we calculate the cumulative error of the epoch
            sum_error=0
            for p_X, p_y in zip(X_test, y_test):
                res=self.forward(p_X,p_y)
                
                sum_error+=(p_y[0][0]-res[-1][0][0])**2
            avge=sum_error/len(y_test)
            print("error",avge)


    # We had to implement linear algebra functions in pure python to simplify our code
    def _matrix_sub(self,A,B):
        return self._matrix_add(A,[[-x for x in row]for row in B])
    # implement of adding 2 matrix function
    def _matrix_add(self, A, B):
        rows_A = len(A)
        cols_A = len(A[0])
        rows_B = len(B)
        cols_B = len(B[0])

        #raise error if dimensions are not equal
        if rows_A != rows_B or cols_A!=cols_B:
            raise "jam nmisheeeeeeeeeeeee"
        # Create the result matrix
        # Dimensions would be rows_A x cols_B
        C = [[0 for col in range(cols_B)] for col in range(rows_A)]

        for i in range(rows_A):
            for j in range(cols_B):
                
                C[i][j]+=A[i][j]+B[i][j]    
        return C
    #implementation of crossing two matrixes
    def _matrix_mul(self, A, B):
        rows_A = len(A)
        cols_A = len(A[0])
        rows_B = len(B)
        cols_B = len(B[0])

        #print(rows_A, cols_A, rows_B, cols_B)
        if cols_A != rows_B:
            raise "nmisheeeeeeeeeeeee"
        # Create the result matrix
        # Dimensions would be rows_A x cols_B
        C = [[0 for row in range(cols_B)] for col in range(rows_A)]

        for i in range(rows_A):
            for j in range(cols_B):
                for k in range(cols_A):
                    C[i][j]+=A[i][k]*B[k][j]    
        return C

    #implementation of dot multiply of two matrixes
    def _matrix_dot_mul(self,A,B):
        rows_A = len(A)
        cols_A = len(A[0])
        rows_B = len(B)
        cols_B = len(B[0])

        #print(rows_A, cols_A, rows_B, cols_B)
        if rows_A != rows_B or cols_A!=cols_B:
            raise "jam nmisheeeeeeeeeeeee"
        # Create the result matrix
        # Dimensions would be rows_A x cols_B
        C = [[0 for col in range(cols_B)] for col in range(rows_A)]

        for i in range(rows_A):
            for j in range(cols_B):
                
                C[i][j]+=A[i][j]*B[i][j]    
        return C



Start of running:

In [47]:
#initializing the variables:
hidden_layer_neurons=20
epoch_count=40

# We first start by reading the data instances from the file
input_path = "PA-A_training_data_01.txt"

In [48]:


X, y = [], []
with open(input_path, 'r') as inp:
    # the structure of the dataset is parsed from the second line
    inp.readline()
    in_n, out_n = inp.readline().split()[2:]
    in_n = int(in_n[2:])
    out_n = int(out_n[2:])

    for line in inp:
        if line and not line.startswith('#'):
            # we again, parse each line by a space to get the list of integer values
            vals = [float(v) for v in line.strip('\n').strip(' ').split(' ') if v]

            # we split the values into x and y
            X.append([vals[:-out_n]])
            y.append([vals[-out_n:]])
            

In [49]:
# Now we can initialize a network and train it
nn = Perceptron(layers_structure=(in_n, hidden_layer_neurons, out_n), transition_funcs=["tanh", "logistic"], learning_rate=0.001, random_seed=42)
nn.train(X, y,X,y,epoch_count)
    

epoch:  0  
 -----------------------------------
error 1.3656375461256047
epoch:  1  
 -----------------------------------
error 1.3234581994736476
epoch:  2  
 -----------------------------------
error 1.2724002490229236
epoch:  3  
 -----------------------------------
error 1.1957433415923893
epoch:  4  
 -----------------------------------
error 1.05682208465237
epoch:  5  
 -----------------------------------
error 0.8808665714971391
epoch:  6  
 -----------------------------------
error 0.8122823620790282
epoch:  7  
 -----------------------------------
error 0.7830777271375964
epoch:  8  
 -----------------------------------
error 0.7580589759896614
epoch:  9  
 -----------------------------------
error 0.7355674295177644
epoch:  10  
 -----------------------------------
error 0.7159644107302162
epoch:  11  
 -----------------------------------
error 0.6992966438321245
epoch:  12  
 -----------------------------------
error 0.6852519891522824
epoch:  13  
 -----------------------