In [1]:
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


In [29]:
#This class implementation is inspired from the NN implemented in cours IFT6093
class NN(object):
    
    
    def __init__(self, input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2, initialization='zeros', mode=',train',
                 datapath=None,model_path=None):
        
        self.indim = input_dim
        self.hd1 = hidden_dims[0] 
        self.hd2 = hidden_dims[1]
        self.n_hidden = n_hidden
        self.outd = output_dim
        self.W1 = np.zeros(shape=(hidden_dims[0], input_dim))
        #print('W1.shape =', self.W1.shape)
        #print('W1 = ', self.W1)
        #print('\n')
        
        self.b1 = np.zeros(hidden_dims[0])
        #print('b1.shape =', self.b1.shape)
        #print('b1 = ', self.b1)
        #print('\n')
        
        self.W2 = np.zeros(shape=(hidden_dims[1], hidden_dims[0]))
        #print('W2.shape =', self.W2.shape)
        #print('W2 = ', self.W2)
        #print('\n')
        
        self.b2 = np.zeros(hidden_dims[1])
        #print('b2.shape =', self.b2.shape)
        #print('b2 = ', self.b2)
        #print('\n')
        
        self.W3 = np.zeros(shape=(output_dim, hidden_dims[1]))
        #print('W3.shape =', self.W3.shape)
        #print('W3 = ', self.W3)
        #print('\n')
        
        self.b3 = np.zeros(output_dim)
        #print('b3.shape =', self.b3.shape)
        #print('b3 = ', self.b3)
        #print('\n')
        
        if initialization=='normal':
            self.initialize_weights_normal()
            #print('W1 = ', self.W1)
            #print('W2 = ', self.W2)
            #print('W3 = ', self.W3)
            
            
        if initialization=='glorot':
            self.initialize_weights_glorot()
            #print('W1 = ', self.W1)
            #print('W2 = ', self.W2)
            #print('W3 = ', self.W3)
            
        
        self.parameters = [self.W3, self.b3, self.W2, self.b2, self.W1, self.b1]
        
        
    def initialize_weights_normal(self):
        
        self.W1 = np.random.normal(loc=0.0, scale=1.0, size=(self.hd1, self.indim))
        self.W2 = np.random.normal(loc=0.0, scale=1.0, size=(self.hd2, self.hd1))
        self.W3 = np.random.normal(loc=0.0, scale=1.0, size=(self.outd, self.hd2))
        
        return self
    
    
    def initialize_weights_glorot(self):
        
        dl1 = np.sqrt(6/(self.indim + self.hd1))
        dl2 = np.sqrt(6/(self.hd1 + self.hd2))
        dl3 = np.sqrt(6/(self.hd2 + self.outd))
        self.W1 = np.random.uniform(low=(-dl1), high=dl1, size=(self.hd1, self.indim))
        self.W2 = np.random.uniform(low=(-dl2), high=dl2, size=(self.hd2, self.hd1))
        self.W3 = np.random.uniform(low=(-dl3), high=dl3, size=(self.outd, self.hd2))
        
        return self
        
        
        
    #Method inspired from NN implemented in cours IFT6093
    def activation (self,input):
        return (input > 0) * input  
    
    #line 85

    def forward(self,x):
        #print('forward')
        
        #a1 = np.dot (self.W1, x) + self.b1 
        #print('a1 = np.dot (self.W1, x) + self.b1')
        #print('a1.shape =', a1.shape)
        #print('a1 = ', a1)
        #print('\n')
        
        h1 = self.activation (a1)
        #print('h1 = self.activation (a1)')
        #print('h1.shape =', h1.shape)
        #print('h1 = ', h1)
        #print('\n')
        
        a2 = np.dot (self.W2, h1) + self.b2
        #print('a2 = np.dot (self.W2, h1) + self.b2')
        #print('a2.shape =', a2.shape)
        #print('a2 = ', a2)
        #print('\n')
        
        h2 = self.activation (a2)
        #print('h2 = self.activation (a2)')
        #print('h2.shape =', h2.shape)
        #print('h2 = ', h2)
        #print('\n')
        
    
        oa = np.dot (self.W3, h2) + self.b3
        #print('oa = np.dot (self.W3, h2) + self.b3')
        #print('oa.shape =', oa.shape)
        #print('oa = ', oa)
        #print('\n')
        
        os = self.softmax (oa, axis=0)
        #print('os = softmax (oa)')
        #print('os.shape =', os.shape)
        #print('os = ', os)
        #print('\n')
               
        return a1, h1, a2, h2, oa, os
    
    

    #Method inspired from NN implemented in cours IFT6093
    def loss (self, y, os):
        return (y * (-np.log(os))).sum()
    

    def softmax (self,x,axis=1):
        shiftx = x - np.max (x, axis=axis, keepdims=True)
        exps = np.exp (shiftx)
        y = exps / exps.sum (axis=axis, keepdims=True)
        return y


    def backward(self, x, y, a1, h1, a2, h2, oa, os, weight_decay=0, cache=None):
        #print ('backward')
        #print('x.shape = ', x.shape)
        #print('y.shape = ', y.shape)
        #print('os.shape = ', os.shape)
        grad_oa = os - y
        #print('grad_oa.shape =', grad_oa.shape)
        #print('\n')
        
        grad_W3 = np.outer (grad_oa, h2) + weight_decay * self.W3
        #print('grad_W3.shape =', grad_W3.shape)
        #print('\n')
        
        grad_b3 = grad_oa
        #print('grad_b3.shape =', grad_b3.shape)
        #print('\n')
        
        grad_h2 = np.dot (self.W3.T, grad_oa)
        #print(' grad_h2.shape =', grad_h2.shape)
        #print('\n')
        
        grad_a2 = (a2 > 0) * grad_h2
        #print('grad_a2.shape =', grad_a2.shape)
        #print('\n')
        
        grad_W2 = np.outer (grad_a2, h1) + weight_decay * self.W2
        #print('grad_W2.shape =', grad_W2.shape)
        #print('\n')
        
        grad_b2 = grad_a2 
        #print('grad_b2.shape =', grad_b2.shape)
        #print('\n')
        
        grad_h1 = np.dot (self.W2.T, grad_a2)
        #print('grad_h1.shape =', grad_h1.shape)
        #print('\n')
        
        grad_a1 = (a1 > 0) * grad_h1
        #print('grad_a1.shape =', grad_a1.shape)
        #print('\n')
        
        grad_W1 = np.outer (grad_a1, x) + weight_decay * self.W1
        #print('grad_W1.shape =', grad_W1.shape)
        #print('\n')
        
        grad_b1 = grad_a1
        #print('grad_b1.shape =', grad_b1.shape)
        #print('\n')
        
        grads=[grad_W3, grad_b3, grad_W2, grad_b2, grad_W1, grad_b1]
   
        return grads



    def update(self, grads, learning_rate):
        for p, grad in zip(self.parameters, grads):
            p -= learning_rate * grad
        
    #line 201   

    def train_SGD(self, x, y_onehot, epoch, n, learning_rate=1e-1, weight_decay=0):
        y= y_onehot
        print('x.shape = ', x.shape)
        print('y.shape = ', y.shape)
        avgLoss=np.zeros((epoch, 2))
        i=0
        while (i<epoch):
            losses = 0
            if (n==1):
                a1, h1, a2, h2, oa, os = self.forward(x)
                grads = self.backward(x, y, a1, h1, a2, h2, oa, os)
                self.update(grads, learning_rate)
                loss = self.loss(y, os)
                losses += loss  
                average_loss = losses / n
            else:    
                for j in range(x.shape[0]):
                    #print ('xj.shape = ', x[j].shape)
                    #print ('yj.shape = ', y[j].shape )
                    a1, h1, a2, h2, oa, os = self.forward(x[j])
                    grads = self.backward(x[j], y[j], a1, h1, a2, h2, oa, os)
                    self.update(grads, learning_rate)
                    loss = self.loss(y[j], os)
                    losses += loss 
                    
                average_loss = losses / n
                #print (average_loss)
            avgLoss[i, 0]= i
            avgLoss[i, 1]= average_loss
                
            i+=1
            
        print ('avgLoss') 
        print ('\n')
        print (avgLoss)  
        
        return avgLoss
    
    
    def prediction_SGD (self, x):
        predictions = np.zeros(x.shape[0])
        for i in range(x.shape[0]):
            _, _, _, _, _, os = self.forward(x[i])
            predictions[i] = os.argmax(axis=0)
            
        return predictions
    
    def accuracy_SGD (self, prediction, y):
        accuracy=0
        for i in range (y.shape[0]):
            accuracies+=(prediction[i]==y[i])
            
        return accuracy / y.shape[0]
    
    
    def test_SGD(self, x, y_onehot, y):
        preds=[]
        avg_loss=0
        for i in range (x.shape[0]):
            _, _, _, _, _, os = self.forward(x[i])
            loss=self.loss (y_onehot[i], os)
            avg_loss+=loss
            pred[i] = argmax.os(axis[0])
            
        accuracy=self.accuracy_SGD(pred, y)    
        return avg_loss / x.shape[0], accuracy
    
   
    def forward_mbatch(self, x):
        #print ('forward minibtach')
        a1 = np.dot ( x, self.W1.T) + self.b1 
        #print('a1 = np.dot (x, self.W1.T) + self.b1')
        #print('a1.shape =', a1.shape)
        #print('a1 = ', a1)
        #print('\n')
        
        h1 = self.activation (a1)
        #print('h1 = self.activation (a1)')
        #print('h1.shape =', h1.shape)
        #print('h1 = ', h1)
        #print('\n')
        
        a2 = np.dot (h1, self.W2.T) + self.b2
        #print('a2 = np.dot (h1, self.W2.T) + self.b2')
        #print('a2.shape =', a2.shape)
        #print('a2 = ', a2)
        #print('\n')
        
        h2 = self.activation (a2)
        #print('h2 = self.activation (a2)')
        #print('h2.shape =', h2.shape)
        #print('h2 = ', h2)
        #print('\n')
        
        oa = np.dot (h2, self.W3.T) + self.b3
        #print('oa = np.dot (h2, self.W3.T) + self.b3')
        #print('oa.shape =', oa.shape)
        #print('oa = ', oa)
        #print('\n')
        
        os = self.softmax (oa, axis=1)
        #print('os = softmax (oa)')
        #print('os.shape =', os.shape)
        #print('os = ', os)
        #print('\n')
               
        return a1, h1, a2, h2, oa, os
    
    #line 303
        
    def backward_mbatch(self, x, y, a1, h1, a2, h2, oa, os, batch_n, weight_decay=0):
        #print ('backward minibatch')
        
        #print('x.shape = ', x.shape)
        #print('y.shape = ', y.shape)
        #print('os.shape = ', os.shape)
        
        
        batch_n = x.shape[0]
        bgrad_oa = os - y
        #print('bgrad_oa.shape =', bgrad_oa.shape)
        #print('\n')
        
        bgrad_W3 = np.dot (bgrad_oa.T, h2) / batch_n  + weight_decay * self.W3
        #print('bgrad_W3.shape =', bgrad_W3.shape)
        #print('\n')
        
        bgrad_b3 = bgrad_oa.mean(axis=0)
        #print('bgrad_b3.shape =', bgrad_b3.shape)
        #print('\n')
        
        bgrad_h2 = np.dot (bgrad_oa, self.W3)
        #print(' bgrad_h2.shape =', bgrad_h2.shape)
        #print('\n')
        
        bgrad_a2 = (a2 > 0) * bgrad_h2
        #print('bgrad_a2.shape =', bgrad_a2.shape)
        #print('\n')
        
        bgrad_W2 = np.dot (bgrad_a2.T, h1) / batch_n  + weight_decay * self.W2
        #print('bgrad_W2.shape =', bgrad_W2.shape)
        #print('\n')
        
        bgrad_b2 = bgrad_a2.mean(axis=0) 
        #print('bgrad_b2.shape =', bgrad_b2.shape)
        #print('\n')
        
        bgrad_h1 = np.dot (bgrad_a2, self.W2)
        #print('bgrad_h1.shape =', bgrad_h1.shape)
        #print('\n')
        
     
        bgrad_a1 = (a1 > 0) * bgrad_h1
        #print('bgrad_a1.shape =', bgrad_a1.shape)
        #print('\n')
        
        bgrad_W1 = np.dot (bgrad_a1.T, x) / batch_n  + weight_decay * self.W1
        #print('bgrad_W1.shape =', bgrad_W1.shape)
        #print('\n')
        
        bgrad_b1 = bgrad_a1.mean(axis=0)
        #print('bgrad_b1.shape =', bgrad_b1.shape)
        #print('\n')
        
        bgrads=[bgrad_W3, bgrad_b3, bgrad_W2, bgrad_b2, bgrad_W1, bgrad_b1]
   
        return bgrads

    #line 360

    #Method taken fron homwork 3 in cours IFT6093
    def loss_mbatch(self, os, y):
        return (y * (-np.log(os))).sum(axis=1).mean(axis=0)     
        
    
    #training with minibatch gradient decent
    def train_mbatch(self, x, y_onehot, epoch, mb_size=100, learning_rate=1e-1, weight_decay=0):
        avgLoss=np.zeros((epoch, 2))
        j=0
        while (j<epoch):
        
            for i in range (0, x.shape[0], mb_size):
                #print (i)
                xi = x[i:(i+mb_size)]
                yi = y_onehot[i:(i+mb_size)]
            
                losses = 0
                a1, h1, a2, h2, oa, os = self.forward_mbatch(xi)
                grads = self.backward_mbatch (xi, yi,a1, h1, a2, h2,oa, os, mb_size)
                self.update(grads, learning_rate)
                average_loss = self.loss_mbatch(os, yi) 
                              
            avgLoss[j, 0]=j
            avgLoss[j, 1]=average_loss
            j+=1
            
        return avgLoss
    
    
    #line 385
    
    def prediction_mbatch (self, x):
        _, _, _, _, _, os = self.forward_mbatch(x)
        return os.argmax(axis=1)
    

    def accuracy_mbatch (self, prediction, y):
        accuracy = np.zeros(y.shape[0])
        accuracy = prediction == y
        return accuracy.mean(axis=0)
    

    def test_mbatch(self, x, y_onehot, y):
        _, _, _, _, _, os = self.forward_mbatch(x)
        loss=loss_mbatch(os, y_onehot)
        accuracy=self.accuracy_mbatch (os.argmax(axis=1), y)
        return loss, accuracy
    
    
    def finite_difference():
        
        pass    
        


In [7]:
def mat_test(self, x, y):
    _, _, _, os = self.mat_fprop(x)
    return self.mat_loss(os, y), os.argmax(axis=1)
    

In [8]:
def softmax (self, x):
        shiftx = x - np.max(x)
        exps=np.exp(shiftx)
        y=exps/np.sum(exps)
        return y

def relu (x):
    y=np.maximum(0, x)
    return y

#function taken from IFT6093 cours
def onehot(y, n_classes):
    o = np.zeros(shape=(y.shape[0], n_classes))
    for i in range(y.shape[0]):
        o[i, int(y[i])] = 1
    return o

Implementation

In [15]:
#backpropagation for 1 exemple


# self, input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2, initialization=zeros, mode=',train',
# datapath=None,model_path=None

NN_model= NN(780, 10, hidden_dims=(500,300))

x = np.random.uniform(-1, 1, size=(780))
print('x.shape = ', x.shape)
#print('x = ', x)
#print('\n')

y = np.zeros(shape=(10, ))
y[1] = 1
print('y.shape = ', y.shape)
#print('y = ', y)
#print('\n')

a1, h1, a2, h2, oa, os = NN_model.forward(x)

#self,cache, x, y,a1, h1, a2, h2, oa, os, weight_decay=0)
grads=NN_model.backward(x, y, a1, h1, a2, h2, oa, os)






b1.shape = (500,)
x.shape =  (780,)
y.shape =  (10,)


In [16]:
#training 10 epoch for 1 exemple

#x, y, epoch, n, learning_rate=1e-1, weight_decay=0
loss=NN_model.train_SGD(x, y, 10, 1)


x.shape =  (780,)
y.shape =  (10,)
avgLoss


[[0.         2.30258509]
 [1.         2.21304726]
 [2.         2.12540029]
 [3.         2.03975147]
 [4.         1.95620456]
 [5.         1.87485816]
 [6.         1.79580405]
 [7.         1.71912567]
 [8.         1.64489656]
 [9.         1.57317906]]


In [17]:
#training for a small data set

x = np.random.uniform(-1, 1, size=(10, 780))
print('x.shape = ', x.shape)
#print('x = ', x)
#print('\n')

y = np.zeros(shape=(10, 10))
y[0, 1] = 1
y[1, 2] = 1
y[2, 7] = 1
y[3, 6] = 1
y[4, 9] = 1
y[5, 4] = 1
y[6, 1] = 1
y[7, 6] = 1
y[8, 5] = 1
y[9, 5] = 1

print('y.shape = ', y.shape)
#print('y = ', y)
print('\n')

# input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2,mode=',train',
# datapath=None,model_path=None

NN_model_2= NN(780, 10, hidden_dims=(500,300))

#x, y, epoch, n, learning_rate=1e-1, weight_decay=0
loss=NN_model_2.train_SGD(x, y, 10, 10)


x.shape =  (10, 780)
y.shape =  (10, 10)


b1.shape = (500,)
x.shape =  (10, 780)
y.shape =  (10, 10)
avgLoss


[[0.         2.31846126]
 [1.         2.26797836]
 [2.         2.2266559 ]
 [3.         2.19275307]
 [4.         2.16482865]
 [5.         2.14170558]
 [6.         2.12243312]
 [7.         2.10624984]
 [8.         2.09254964]
 [9.         2.08085186]]


In [19]:
#Code taken from cours IFT6093 homework 3

data_circles = np.loadtxt(open('cercles.txt','r'))
cercle_x=np.array(data_circles[:, :-1])
cercle_y=np.array(data_circles[:, -1])
data_circles_target_onehot = onehot(cercle_y, 2)

print (data_circles.shape)

def plot_decision(model, axis=None):
    if axis is None:
        axis = plt.gca()
    xx, yy = np.meshgrid(np.arange(-1.1, 1.11, 0.01),
                         np.arange(-1.1, 1.11, 0.01))
    Z = model.loop_predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    axis.contourf(xx, yy, Z, 1, alpha=0.8)
    axis.scatter(data_circles[:, 0], data_circles[:, 1], c=data_circles[:, 2])

(1100, 3)


In [23]:
#  SGD training for cercle
import time

# self, input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2, initialization=zeros, mode=',train',
# datapath=None,model_path=None

NN_cercle= NN(2, 2, hidden_dims=(500,300), initialization='glorot')

#training 1 epoch for 1 exemple
start_time = time.time()
#x, y, epoch, n, learning_rate=1e-1, weight_decay=0
loss=NN_cercle.train_SGD(cercle_x, data_circles_target_onehot, 20, 1100)
time_SGD = time.time() - start_time

print('Time with loop implementation: %f seconds\n' % time_SGD)
      







b1.shape = (500,)
x.shape =  (1100, 2)
y.shape =  (1100, 2)
avgLoss


[[ 0.          0.7057476 ]
 [ 1.          0.70563596]
 [ 2.          0.70563596]
 [ 3.          0.70563596]
 [ 4.          0.70563596]
 [ 5.          0.70563596]
 [ 6.          0.70563596]
 [ 7.          0.70563596]
 [ 8.          0.70563596]
 [ 9.          0.70563596]
 [10.          0.70563596]
 [11.          0.70563596]
 [12.          0.70563596]
 [13.          0.70563596]
 [14.          0.70563596]
 [15.          0.70563596]
 [16.          0.70563596]
 [17.          0.70563596]
 [18.          0.70563596]
 [19.          0.70563596]]
Time with loop implementation: 121.251935 seconds

[[ 0.          0.7057476 ]
 [ 1.          0.70563596]
 [ 2.          0.70563596]
 [ 3.          0.70563596]
 [ 4.          0.70563596]
 [ 5.          0.70563596]
 [ 6.          0.70563596]
 [ 7.          0.70563596]
 [ 8.          0.70563596]
 [ 9.          0.70563596]
 [10.          0.70563596]
 [11.          0.70563596]
 [12.         

In [24]:
#Minibatch training for a small data set

x = np.random.uniform(-1, 1, size=(100, 780))
print('x.shape = ', x.shape)
#print('x = ', x)
print('\n')

y = np.zeros(shape=(100, 10))

for i in range (y.shape[0]):
    if ((random.choice((0, 1)))==0):
        y[i, 0] = 1
    else:
        y[i, 1] = 1 


print('y.shape = ', y.shape)
#print('y = ', y)
print('\n')

# input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2,mode=',train',
# datapath=None,model_path=None

NN_model_mbatch_1= NN(780, 10, hidden_dims=(500,300))

#x, y, epoch, mb_size=100, learning_rate=1e-1, weight_decay=0
loss_mbatch_1=NN_model_mbatch_1.train_mbatch(x, y, 10, 100)


print(loss_mbatch_1)


x.shape =  (100, 780)


y.shape =  (100, 10)


b1.shape = (500,)
0
forward minibtach
backward minibatch
0
forward minibtach
backward minibatch
0
forward minibtach
backward minibatch
0
forward minibtach
backward minibatch
0
forward minibtach
backward minibatch
0
forward minibtach
backward minibatch
0
forward minibtach
backward minibatch
0
forward minibtach
backward minibatch
0
forward minibtach
backward minibatch
0
forward minibtach
backward minibatch
[[0.         2.30258509]
 [1.         2.26270751]
 [2.         2.22364134]
 [3.         2.18539328]
 [4.         2.14796868]
 [5.         2.1113715 ]
 [6.         2.0756043 ]
 [7.         2.04066824]
 [8.         2.00656308]
 [9.         1.97328721]]


In [30]:
#  minibatch training for cercle

# input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2,mode=',train',
# datapath=None,model_path=None

NN_cercle_mbatch= NN(2, 2, hidden_dims=(500,300), initialization='glorot')

#training 20 epoch for whole data set
start_time = time.time()
#x, y, epoch, mb_size=100, learning_rate=1e-1, weight_decay=0
loss=NN_cercle_mbatch.train_mbatch(cercle_x, data_circles_target_onehot, 20, 100)

time_SGD = time.time() - start_time

print('Time with loop implementation: %f seconds\n' % time_SGD)

print(loss)

Time with loop implementation: 3.444197 seconds

[[ 0.          0.68700839]
 [ 1.          0.68461111]
 [ 2.          0.68210237]
 [ 3.          0.67938355]
 [ 4.          0.67654112]
 [ 5.          0.67341343]
 [ 6.          0.66996629]
 [ 7.          0.66630652]
 [ 8.          0.66234562]
 [ 9.          0.6580134 ]
 [10.          0.65334065]
 [11.          0.6482172 ]
 [12.          0.64282431]
 [13.          0.63699339]
 [14.          0.63083991]
 [15.          0.6239335 ]
 [16.          0.61634522]
 [17.          0.60819468]
 [18.          0.59932711]
 [19.          0.58986335]]


In [142]:
a= np.array([1, 0, 1, 0, 1, 0, 2, 0])
b= np.array([1, 1, 1, 1, 1, 1, 1, 1])
c= np.zeros(b.shape[0])
c = a==b
d= c.mean(axis=0)
d
e=a.argmax(axis=0)
e

6