In [None]:
from IPython.core.display import display, HTML
#display(HTML("<style>.container { width:65% !important; }</style>"))

import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import sys
import utils
import layers_builder

In [None]:
def ground_prediction(x1_i, x2_i):
    ground = np.bitwise_and(x1_i, x2_i)
    return ground
    
    
n_samples = 1000
X1 = np.random.choice(np.random.randint(0, 10000, 10000), n_samples)
X2 = np.random.choice(np.random.randint(10000, 20000, 10000), n_samples)
X1 = np.random.choice(np.random.randint(0, 2, 10000), n_samples)
X2 = np.random.choice(np.random.randint(0, 2, 10000), n_samples)
X = np.asarray([X1, X2]).transpose()
X_norm = X

# scaler = MinMaxScaler(feature_range=(-1, 1))
# scaler.fit(X)
# X_norm = scaler.transform(X)

y = list()
for i in np.arange(X.shape[0]):
    y_i = ground_prediction(X[i,0], X[i,1])
    y.append(y_i)
    
y = np.asarray(y) 
y_norm = y

# y = y.reshape(-1,1)
# scaler_y = MinMaxScaler(feature_range=(-1, 1))
# scaler_y.fit(y)
# y_norm = scaler_y.transform(y)

In [None]:
class NeuralNet:
        
    def __init__(self,  layers_dim):
        self.layers_dim = layers_dim
        self.layer_out_id = len(layers_dim) - 1
        self.idx = 0
        
        self.net = layers_builder.net_constructer(layers_dim)
        self.err_history = []
        self.ll_history = []
        self.mse = None
       
    
    
    def feed_forward_NN(self):
        
        for i in np.arange(0, self.layer_out_id + 1, dtype=int):
            # the first layer receive the input
            if( i == 0 ):
                idx = np.random.choice(np.random.randint(0, 1000, 1000), 1)
                idx = np.asscalar(idx)
                self.idx = idx
                self.net[i].a[1:] = X_norm[idx, :].transpose()
                continue
                
            self.net[i].z = self.net[i-1].W.dot(self.net[i-1].a)
            self.net[i].set_activation()
        
    
    
    def back_propagate_error(self):
        # Two-stage process: 
        ## 1st, a distinguish to get the output layer error
        ## 2nd, a standard computation to get the hidden layers errors
        
        # output layer
        activation_error = -(y_norm[self.idx] - self.net[self.layer_out_id].a)
        activation_derivative = utils.fun_sigmoid_derivative(self.net[self.layer_out_id].a)
        self.net[self.layer_out_id].d = np.multiply(activation_error, activation_derivative)

        
        # hidden layers
        for i in np.arange(1, self.layer_out_id, dtype=int)[::-1]:
            # the (-) exclude the row with the pior error that was directed to bias
            d_next = self.net[i+1].d
            if self.net[i+1].bias: 
                d_next = d_next[1:]
            
            # check if dimensions are adjusted
            if ( len(d_next.shape) == 1):
                d_next = np.expand_dims(d_next, 1)
            
            d_activation = self.net[i].W.transpose().dot(d_next)
            summation_derivative = utils.fun_sigmoid_derivative(self.net[i].a)
            
            # check if dimensions are adjusted
            if ( len(d_activation.shape) == 1):
                d_activation = np.expand_dims(d_activation, -1)
            if ( len(summation_derivative.shape) == 1):
                summation_derivative = np.expand_dims(summation_derivative, -1)
            

            self.net[i].d = np.multiply(d_activation, summation_derivative)
            
            
    
    def compute_gradients_errors(self):
        # update layer errors
        self.back_propagate_error()
        
        # hidden layers
        for i in np.arange(0, self.layer_out_id, dtype=int)[::-1]:

            layer_cur_activ_vector = self.net[i].a
            layer_next_error_vector = self.net[i+1].d
            
            # if layer next (l+1) has bias, remove its error row
            if self.net[i+1].bias: 
                layer_next_error_vector = layer_next_error_vector[1:]
            
            
            # check if dimensions are adjusted
            if ( len(layer_cur_activ_vector.shape) == 1):
                layer_cur_activ_vector = np.expand_dims(layer_cur_activ_vector, 1)
            if ( len(layer_next_error_vector.shape) == 1):
                layer_next_error_vector = np.expand_dims(layer_next_error_vector, 1)

            
            self.net[i].g = layer_next_error_vector.dot(layer_cur_activ_vector.transpose()) 
            
            
    
    def update_weights(self, r, check_grad=False):
        # get gradient error for each weight
        self.compute_gradients_errors()
        
        for i in np.arange(0, self.layer_out_id, dtype=int)[::-1]:
            self.net[i].update_weights(r)

        

    def train(self, r, iterations):
        for i in np.arange(iterations):
            self.feed_forward_NN()
            self.update_weights(r)
    
    
    
    def predict(self, x_i):
        for i in np.arange(0, self.layer_out_id + 1, dtype=int):
            if( i == 0 ):
                self.net[i].a[1:] = x_i.transpose()
                continue
            
            self.net[i].z = self.net[i-1].W.dot(self.net[i-1].a)
            self.net[i].set_activation()
        
        return self.net[self.layer_out_id ].a
    
    
    
    def mean_squared_error(self):
        h = self.predict(X_norm[self.idx, :])
        y = y_norm[self.idx]
        self.mse = 0.5*np.power(y - h, 2)
        
    

    def compute_gradient_approximation(self, i, weight_shift=1e-4):
        W_shape = self.net[i].W.shape
        for j_w in np.arange(W_shape[1]):
            for i_w in np.arange(W_shape[0]):
                # shift to minus limit
                self.net[i].W[i_w, j_w] = self.net[i].W[i_w, j_w] - weight_shift
                self.mean_squared_error()
                mse_shift_negative = self.mse

                # remove shift
                self.net[i].W[i_w, j_w] = self.net[i].W[i_w, j_w] + weight_shift

                # shift to plus limit
                self.net[i].W[i_w, j_w] = self.net[i].W[i_w, j_w] + weight_shift
                self.mean_squared_error()
                mse_shift_positive = self.mse

                # remove shift
                self.net[i].W[i_w, j_w] = self.net[i].W[i_w, j_w] - weight_shift

                # approximate gradient
                self.net[i].ga[i_w, j_w] = (mse_shift_positive - mse_shift_negative)/(2*weight_shift)                    



    def check_gradient_computation(self):
        # update all gradient errors
        self.compute_gradients_errors()
        self.mean_squared_error()

        # now do the same manually
        for i in np.arange(0, self.layer_out_id, dtype=int)[::-1]:
            self.compute_gradient_approximation(i)
            check = self.net[i].check_gradient_computation(rtol=0.25)
            if not check:
                print "g:"
                print self.net[i].g
                print "ga:"
                print self.net[i].ga
                sys.exit("Error in compute gradient from layer " + str(self.net[i].layer_id))
                
        return "Success"
            


In [None]:
nn_test = NeuralNet([2, 50, 50, 50, 500, 50, 50, 1])
nn_test.train(3e-3, 20)

In [None]:
print nn_test.check_gradient_computation()