In [1]:
from IPython.core.display import display, HTML
#display(HTML("<style>.container { width:65% !important; }</style>"))

import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import sys
import utils
import layers_builder

In [6]:
def ground_prediction(x1_i, x2_i):
    ground = np.bitwise_and(x1_i, x2_i)
    return ground
    
    
n_samples = 1000
X1 = np.random.choice(np.random.randint(0, 10000, 10000), n_samples)
X2 = np.random.choice(np.random.randint(10000, 20000, 10000), n_samples)
X1 = np.random.choice(np.random.randint(0, 2, 10000), n_samples)
X2 = np.random.choice(np.random.randint(0, 2, 10000), n_samples)
X = np.asarray([X1, X2]).transpose()
X_norm = X

# scaler = MinMaxScaler(feature_range=(-1, 1))
# scaler.fit(X)
# X_norm = scaler.transform(X)

y = list()
for i in np.arange(X.shape[0]):
    y_i = ground_prediction(X[i,0], X[i,1])
    y.append(y_i)
    
y = np.asarray(y) 
y_norm = y

# y = y.reshape(-1,1)
# scaler_y = MinMaxScaler(feature_range=(-1, 1))
# scaler_y.fit(y)
# y_norm = scaler_y.transform(y)

In [3]:
def compute_gradient_approximation(L2_weights, L1_weights, idx, e):
    # 2nd layer
    L2_weights_gradsAprox = np.zeros(L2_weights.shape)
    for i in np.arange(L2_weights.shape[0]):
        for j in np.arange(L2_weights.shape[1]):
            L2_weights_shift = np.copy(L2_weights)
            
            L2_weights_shift[i,j] = L2_weights[i,j] + e
            a_3, a_2, a_1 = nn_test.feed_forward_NN(L2_weights_shift, L1_weights, idx)
            err_plus = compute_mean_squared_error(a_3, idx)

            L2_weights_shift[i,j] = L2_weights[i,j] - e
            a_3, a_2, a_1 = nn_test.feed_forward_NN(L2_weights_shift, L1_weights, idx)
            err_minus = compute_mean_squared_error(a_3, idx)
    
            derv = (err_plus - err_minus)/(2*e)
            L2_weights_gradsAprox[i,j] = derv
            
           
    # 1st layer
    L1_weights_gradsAprox = np.zeros(L1_weights.shape)
    for i in np.arange(L1_weights.shape[0]):
        for j in np.arange(L1_weights.shape[1]):
            L1_weights_shift = np.copy(L1_weights)
            
            L1_weights_shift[i,j] = L1_weights[i,j] + e
            a_3, a_2, a_1 = nn_test.feed_forward_NN(L2_weights, L1_weights_shift, idx)
            err_plus = compute_mean_squared_error(a_3, idx)

            L1_weights_shift[i,j] = L1_weights[i,j] - e
            a_3, a_2, a_1 = nn_test.feed_forward_NN(L2_weights, L1_weights_shift, idx)
            err_minus = compute_mean_squared_error(a_3, idx)
    
            derv = (err_plus - err_minus)/(2*e)
            L1_weights_gradsAprox[i,j] = derv
            
            
    return L2_weights_gradsAprox, L1_weights_gradsAprox
    


def check_gradient(L2_weights, grad_L2, L1_weights, grad_L1, idx, e=1e-4):
        
    L2_weights_gradsAprox, L1_weights_gradsAprox = compute_gradient_approximation(L2_weights, L1_weights, idx, e)
    
    L2_grads_check = np.allclose(L2_weights_gradsAprox, grad_L2, atol=1e-3)
    if not (L2_grads_check): sys.exit("Error in L2 gradients values checking ")
    
    L2_grads_check_shape = (L2_weights_gradsAprox.shape == grad_L2.shape)
    if not (L2_grads_check_shape): sys.exit("Error in L2 gradients shape checking ")
        
    L1_grads_check = np.allclose(L1_weights_gradsAprox, grad_L1, atol=1e-2)    
    if not (L1_grads_check): sys.exit("Error in L1 gradients values checking")

    L1_grads_check_shape = (L1_weights_gradsAprox.shape == grad_L1.shape)
    if not (L1_grads_check_shape): sys.exit("Error in L1 gradients shape checking ")
    

In [48]:
class NeuralNet:
        
    def __init__(self,  layers_dim, r, iterations):
        self.layers_dim = layers_dim
        self.layer_out_id = len(layers_dim) - 1
        self.idx = 0
        
        self.r = r
        self.iterations = iterations
        self.net = layers_builder.net_constructer(layers_dim)
        self.err_history = []
        self.ll_history = []
       
    
    
    def feed_forward_NN(self):
        
        for i in np.arange(0, self.layer_out_id, dtype=int):
            # the first layer receive the input
            if( i == 0 ):
                idx = np.random.choice(np.random.randint(0, 1000, 1000), 1)
                idx = np.asscalar(idx)
                self.idx = idx
                self.net[i].a[:-1] = X_norm[idx, :].transpose()
                continue
                
            self.net[i].z = self.net[i-1].W.dot(self.net[i-1].a)
            self.net[i].set_activation()
        
    
    
    def back_propagate_error(self):
        # Two-stage process: 
        ## 1st, a distinguish to get the output layer error
        ## 2nd, a standard computation to get the hidden layers errors
        
        # output layer
        self.net[self.layer_out_id].d = -(y[self.idx] - self.net[self.layer_out_id].a)
        
        # hidden layers
        for i in np.arange(1, self.layer_out_id, dtype=int)[::-1]:
            # the (-) exclude the row with the pior error that was directed to bias
            d_next = self.net[i+1].d
            if self.net[i+1].bias: d_next = d_next[:-1]
                
            d_activation = self.net[i].W.transpose().dot(d_next)
            summation_derivative = utils.fun_sigmoid_derivative(self.net[i].a)
            
            # check if dimensions are adjusted
            if ( len(d_activation.shape) == 1):
                d_activation = np.expand_dims(d_activation, -1)
            if ( len(summation_derivative.shape) == 1):
                summation_derivative = np.expand_dims(summation_derivative, -1)
            

            self.net[i].d = np.multiply(d_activation, summation_derivative)
            
            
    
    def compute_gradient_error(self):
        # hidden layers
        for i in np.arange(0, self.layer_out_id, dtype=int)[::-1]:
            lcur_activ_vector = self.net[i].a.transpose()
            lnext_error_vector = self.net[i+1].d
            
            # if layer next (l+1) has bias, remove its error row
            if self.net[i+1].bias: 
                lnext_error_vector = lnext_error_vector[:-1]
            
            
            # check if dimensions are adjusted
            if ( len(lcur_activ_vector.shape) == 1):
                lcur_activ_vector = np.expand_dims(lcur_activ_vector, -1)
            if ( len(lnext_error_vector.shape) == 1):
                lnext_error_vector = np.expand_dims(lnext_error_vector, -1)
            
            
            self.net[i].g = lnext_error_vector.dot(lcur_activ_vector.transpose()) 
            
            
    
    def update_weights(self, check_grad=False):
        
        self.back_propagate_error()
        self.compute_gradient_error()
        
        for i in np.arange(0, self.layer_out_id, dtype=int)[::-1]:
            self.net[i].update_weights(self.r)

        

    
    def train(self):

        for i in np.arange(self.iterations):
            self.feed_forward_NN()
            self.update_weights()
    
    
    def predict(self, x_i):
        print x_i
        for i in np.arange(0, self.layer_out_id, dtype=int):
            if( i == 0 ):
                self.net[i].a[:-1] = x_i.transpose()
                continue
                
            self.net[i].z = self.net[i-1].W.dot(self.net[i-1].a)
            self.net[i].set_activation()
        
        return self.net[self.layer_out_id].a
        

In [49]:
nn_test = NeuralNet([2, 3, 4, 1], 3e-2, 10000)
print nn_test.net[1].W
nn_test.train()
print "\n"
print nn_test.net[1].W

[[ 0.7805165   0.29196826  0.63253794  1.25185922]
 [-2.21968471 -0.02156894 -0.86610845 -0.25875415]
 [ 0.57128149 -1.09599244  0.07614549 -0.22501438]
 [-2.37400008  0.90286108  0.73489375 -1.59227911]]


[[ 1.66137326  2.40072843  2.89493099  4.52181371]
 [-1.10419935  3.20995078  2.62876408  4.38010762]
 [ 1.99476726  1.85081994  3.21359224  4.61044563]
 [-1.21981753  3.77655327  3.85981233  2.77537113]]


In [32]:
print X_norm[5:10, :]
print y_norm[5:10]

[[0 0]
 [0 1]
 [0 0]
 [1 1]
 [0 1]]
[0 0 0 1 0]


In [50]:
xid = 960
print nn_test.predict(X_norm[xid,:])
print y_norm[xid]

[0 0]
[ 0.58698198]
0
