In [2]:

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


In [67]:
#This class implementation is inspired from the NN implemented in cours IFT6093
class NN(object):
    
    
    def __init__(self, input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2,mode=',train',
                 datapath=None,model_path=None):
        
        self.indim = input_dim
        self.hd1 = hidden_dims[0] 
        self.hd2 = hidden_dims[1]
        self.n_hidden = n_hidden
        self.outd = output_dim
        self.W1 = np.zeros(shape=(hidden_dims[0], input_dim))
        print('W1.shape =', self.W1.shape)
        #print('W1 = ', self.W1)
        print('\n')
        
        self.b1 = np.zeros(hidden_dims[0])
        print('b1.shape =', self.b1.shape)
        #print('b1 = ', self.b1)
        print('\n')
        
        self.W2 = np.zeros(shape=(hidden_dims[1], hidden_dims[0]))
        print('W2.shape =', self.W2.shape)
        #print('W2 = ', self.W2)
        print('\n')
        
        self.b2 = np.zeros(hidden_dims[1])
        print('b2.shape =', self.b2.shape)
        #print('b2 = ', self.b2)
        print('\n')
        
        self.W3 = np.zeros(shape=(output_dim, hidden_dims[1]))
        print('W3.shape =', self.W3.shape)
        #print('W3 = ', self.W3)
        print('\n')
        
        self.b3 = np.zeros(output_dim)
        print('b3.shape =', self.b3.shape)
        #print('b3 = ', self.b3)
        print('\n')
        
        self.parameters = [self.W3, self.b3, self.W2, self.b2, self.W1, self.b1]
        
        
    def initialize_weights_normal(self):
        
        self.W1 = np.random.normal(loc=0.0, scale=1.0, size=(self.hd1, self.indim))
        self.W2 = np.random.normal(loc=0.0, scale=1.0, size=(self.hd2, self.hd1))
        self.W3 = np.random.normal(loc=0.0, scale=1.0, size=(self.outd, self.hd2))
    
    
    def initialize_weights_glorot(self,n_hidden,dims):
        
        dl1 = np.sqrt(6/(self.indim + self.hd1))
        dl2 = np.sqrt(6/(self.hd1 + self.hd2))
        dl3 = np.sqrt(6/(self.hd2 + self.outd))
        self.W1 = np.random.uniform(low=(-dl1), high=dl1, size=(self.hd1, self.indim))
        self.W2 = np.random.uniform(low=(-dl2), high=dl2, size=(self.hd2, self.hd1))
        self.W3 = np.random.uniform(low=(-dl3), high=dl3, size=(self.outd, self.hd2))
        
        
    #Method inspired from NN implemented in cours IFT6093
    def activation (self,input):
        return (input > 0) * input    
    

    def forward(self,x):
        
        a1 = np.dot (self.W1, x) + self.b1 
        #print('a1 = np.dot (self.W1, x) + self.b1')
        #print('a1.shape =', a1.shape)
        #print('a1 = ', a1)
        #print('\n')
        
        h1 = self.activation (a1)
        #print('h1 = self.activation (a1)')
        #print('h1.shape =', h1.shape)
        #print('h1 = ', h1)
        #print('\n')
        
        a2 = np.dot (self.W2, h1) + self.b2
        #print('a2 = np.dot (self.W2, h1) + self.b2')
        #print('a2.shape =', a2.shape)
        #print('a2 = ', a2)
        #print('\n')
        
        h2 = self.activation (a2)
        #print('h2 = self.activation (a2)')
        #print('h2.shape =', h2.shape)
        #print('h2 = ', h2)
        #print('\n')
        
        oa = np.dot (self.W3, h2) + self.b3
        #print('oa = np.dot (self.W3, h2) + self.b3')
        #print('oa.shape =', oa.shape)
        #print('oa = ', oa)
        #print('\n')
        
        os = softmax (oa)
        #print('os = softmax (oa)')
        #print('os.shape =', os.shape)
        #print('os = ', os)
        #print('\n')
               
        return a1, h1, a2, h2, oa, os
    
    

    #Method inspired from NN implemented in cours IFT6093
    def loss (self,prediction,os):
        return (y * (-np.log(os))).sum()
    

    def softmax (self,input,axis=1):
        shiftx = x - np.max (x, axis=axis, keepdims=True)
        exps = np.exp (shiftx)
        y = exps / exps.sum (axis=axis, keepdims=True)
        return y
    

    def backward(self, x, y, a1, h1, a2, h2, oa, os, weight_decay=0, cache=None):
        #print('x.shape = ', x.shape)
        #print('y.shape = ', y.shape)
        #print('os.shape = ', os.shape)
        grad_oa = os - y
        #print('grad_oa.shape =', grad_oa.shape)
        grad_W3 = np.outer (grad_oa, h2) + weight_decay * self.W3
        #print('grad_W3.shape =', grad_W3.shape)
        grad_b3 = grad_oa
        #print('grad_b3.shape =', grad_b3.shape)
        grad_h2 = np.dot (self.W3.T, grad_oa)
        #print(' grad_h2.shape =', grad_h2.shape)
        grad_a2 = (a2 > 0) * grad_h2
        #print('grad_a2.shape =', grad_a2.shape)
        grad_W2 = np.outer (grad_a2, h1) + weight_decay * self.W2
        #print('grad_W2.shape =', grad_W2.shape)
        grad_b2 = grad_a2 
        #print('grad_b2.shape =', grad_b2.shape)
        grad_h1 = np.dot (self.W2.T, grad_a2)
        #print('grad_h1.shape =', grad_h1.shape)
        grad_a1 = (a1 > 0) * grad_h1
        #print('grad_a1.shape =', grad_a1.shape)
        grad_W1 = np.outer (grad_a1, x) + weight_decay * self.W1
        #print('grad_W1.shape =', grad_W1.shape)
        grad_b1 = grad_a1
        #print('grad_b1.shape =', grad_b1.shape)
        
        grads=[grad_W3, grad_b3, grad_W2, grad_b2, grad_W1, grad_b1]
   
        return grads



    def update(self, grads, mu):
        for p, grad in zip(self.parameters, grads):
            p -= mu * grad
        
       

    def train_SGD(self, x, y, epoch, n, learning_rate=1e-1, weight_decay=0):
        print('x.shape = ', x.shape)
        print('y.shape = ', y.shape)
        avgLoss=np.zeros((epoch, 2))
        i=0
        while (i<epoch):
            losses = 0
            if (n==1):
                a1, h1, a2, h2, oa, os = self.forward(x)
                grads = self.backward(x, y, a1, h1, a2, h2, oa, os)
                self.update(grads, learning_rate)
                loss = self.loss(y, os)
                losses += loss  
                average_loss = losses / n
            else:    
                for j in range(x.shape[0]):
                    #print ('xj.shape = ', x[j].shape)
                    #print ('yj.shape = ', y[j].shape )
                    a1, h1, a2, h2, oa, os = self.forward(x[j])
                    grads = self.backward(x[j], y[j], a1, h1, a2, h2, oa, os)
                    self.update(grads, learning_rate)
                    loss = self.loss(y[j], os)
                    losses += loss 
                    
                average_loss = losses / n
                #print (average_loss)
            avgLoss[i, 0]= i
            avgLoss[i, 1]= average_loss
                
            i+=1
            
        print ('avgLoss') 
        print ('\n')
        print (avgLoss)  
        
        return avgLoss
    
    
    def forward_mbatch(self,input,labels):
        pass
        
    def backward_mbatch(self,cache, x, y,a1, h1, a2, h2, oa, os, weight_decay=0):
        pass
        
        
    
    #training with minibatch gradient decent
    def train_mbatch(self, x, y, mb_size=100, learning_rate=1e-1, weight_decay=0):
        
        for i in range (0, x.shape[0], mb_size):
        
            xi = x[i:(i+mb_size)]
            yi = y[i:(i+mb_size)]
            
            losses = 0
            for j in range (mb_size):
                a1, h1, a2, h2, oa, os = self.forward(self,xi,yi)
                grad = self.backward(self, xi, yi,a1, h1, a2, h2,oa, os,cache=none)
                self.parameters = update_parms(average_grads, mu)
                loss = self.loss(self, yi, os)
                losses += loss                          
            average_loss = losses / xi.shape[0]
                    
            return average_loss


In [60]:
def prediction (self, x):
    predictions = np.zeros(x.shape[0])
    for i in range(x.shape[0]):
        a1, h1, a2, h2, oa, os = self.forward(x[i])
        predictions[i] = os.argmax()
    return predictions

In [61]:
def test(self):
    pass
    

In [62]:
def softmax(x):
    shiftx = x - np.max(x)
    exps=np.exp(shiftx)
    y=exps/np.sum(exps)
    return y

def relu (x):
    y=np.maximum(0, x)
    return y

#function taken from IFT6093 cours
def onehot(y, n_classes):
    o = np.zeros(shape=(y.shape[0], n_classes))
    for i in range(y.shape[0]):
        o[i, int(y[i])] = 1
    return o

Implementation

In [68]:
#backpropagation for 1 exemple


# input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2,mode=',train',
# datapath=None,model_path=None

NN_model= NN(780, 10, hidden_dims=(500,300))

x = np.random.uniform(-1, 1, size=(780))
print('x.shape = ', x.shape)
#print('x = ', x)
#print('\n')

y = np.zeros(shape=(10, ))
y[1] = 1
print('y.shape = ', y.shape)
#print('y = ', y)
#print('\n')

a1, h1, a2, h2, oa, os = NN_model.forward(x)

#self,cache, x, y,a1, h1, a2, h2, oa, os, weight_decay=0)
grads=NN_model.backward(x, y, a1, h1, a2, h2, oa, os)

for grad in grads:
    print('gradients computed by bprop: \n grad_W3, grad_b3, grad_W2, grad_b2, grad_W1, grad_b1',
      grad.shape)



W1.shape = (500, 780)


b1.shape = (500,)


W2.shape = (300, 500)


b2.shape = (300,)


W3.shape = (10, 300)


b3.shape = (10,)


x.shape =  (780,)
y.shape =  (10,)
gradients computed by bprop: 
 grad_W3, grad_b3, grad_W2, grad_b2, grad_W1, grad_b1 (10, 300)
gradients computed by bprop: 
 grad_W3, grad_b3, grad_W2, grad_b2, grad_W1, grad_b1 (10,)
gradients computed by bprop: 
 grad_W3, grad_b3, grad_W2, grad_b2, grad_W1, grad_b1 (300, 500)
gradients computed by bprop: 
 grad_W3, grad_b3, grad_W2, grad_b2, grad_W1, grad_b1 (300,)
gradients computed by bprop: 
 grad_W3, grad_b3, grad_W2, grad_b2, grad_W1, grad_b1 (500, 780)
gradients computed by bprop: 
 grad_W3, grad_b3, grad_W2, grad_b2, grad_W1, grad_b1 (500,)


In [69]:
#training 10 epoch for 1 exemple

#x, y, epoch, n, learning_rate=1e-1, weight_decay=0
loss=NN_model.train_SGD(x, y, 10, 1)


x.shape =  (780,)
y.shape =  (10,)
avgLoss


[[ 0.          2.30258509]
 [ 1.          2.21304726]
 [ 2.          2.12540029]
 [ 3.          2.03975147]
 [ 4.          1.95620456]
 [ 5.          1.87485816]
 [ 6.          1.79580405]
 [ 7.          1.71912567]
 [ 8.          1.64489656]
 [ 9.          1.57317906]]


In [70]:
#training for a small data set

x = np.random.uniform(-1, 1, size=(10, 780))
print('x.shape = ', x.shape)
#print('x = ', x)
#print('\n')

y = np.zeros(shape=(10, 10))
y[0, 1] = 1
y[1, 2] = 1
y[2, 7] = 1
y[3, 6] = 1
y[4, 9] = 1
y[5, 4] = 1
y[6, 1] = 1
y[7, 6] = 1
y[8, 5] = 1
y[9, 5] = 1

print('y.shape = ', y.shape)
#print('y = ', y)
print('\n')

# input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2,mode=',train',
# datapath=None,model_path=None

NN_model_2= NN(780, 10, hidden_dims=(500,300))

#x, y, epoch, n, learning_rate=1e-1, weight_decay=0
loss=NN_model_2.train_SGD(x, y, 10, 10)
print(loss)

x.shape =  (10, 780)
y.shape =  (10, 10)


W1.shape = (500, 780)


b1.shape = (500,)


W2.shape = (300, 500)


b2.shape = (300,)


W3.shape = (10, 300)


b3.shape = (10,)


x.shape =  (10, 780)
y.shape =  (10, 10)
avgLoss


[[  0.          22.83400771]
 [  1.          22.32312096]
 [  2.          21.90394395]
 [  3.          21.55921442]
 [  4.          21.27461712]
 [  5.          21.03843852]
 [  6.          20.84119783]
 [  7.          20.6752849 ]
 [  8.          20.5346269 ]
 [  9.          20.41439562]]
[[  0.          22.83400771]
 [  1.          22.32312096]
 [  2.          21.90394395]
 [  3.          21.55921442]
 [  4.          21.27461712]
 [  5.          21.03843852]
 [  6.          20.84119783]
 [  7.          20.6752849 ]
 [  8.          20.5346269 ]
 [  9.          20.41439562]]


In [71]:
#Code taken from cours IFT6093 homework 3

data_circles = np.loadtxt(open('cercles.txt','r'))
cercle_x=np.array(data_circles[:, :-1])
data_circles_target_onehot = onehot(data_circles[:, -1], 2)
print (data_circles.shape)

def plot_decision(model, axis=None):
    if axis is None:
        axis = plt.gca()
    xx, yy = np.meshgrid(np.arange(-1.1, 1.11, 0.01),
                         np.arange(-1.1, 1.11, 0.01))
    Z = model.loop_predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    axis.contourf(xx, yy, Z, 1, alpha=0.8)
    axis.scatter(data_circles[:, 0], data_circles[:, 1], c=data_circles[:, 2])

FileNotFoundError: [Errno 2] No such file or directory: 'cercles.txt'

In [67]:
#  SGD training for cercle

# self, input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2,mode=',train',
# datapath=None,model_path=None

NN_cercle= NN(2, 2, hidden_dims=(500,300))

#training 1 epoch for 1 exemple

#x, y, epoch, n, learning_rate=1e-1, weight_decay=0
loss=NN_model.train_SGD(cercle_x, data_circles_target_onehot, 1, 1100)
print(loss)




W1.shape = (500, 2)


b1.shape = (500,)


W2.shape = (300, 500)


b2.shape = (300,)


W3.shape = (2, 300)


b3.shape = (2,)


x.shape =  (1100, 2)
y.shape =  (1100, 2)
a1 = np.dot (self.W1, x) + self.b1
a1.shape = (500,)


h1 = self.activation (a1)
h1.shape = (500,)


a2 = np.dot (self.W2, h1) + self.b2
a2.shape = (300,)


h2 = self.activation (a2)
h2.shape = (300,)


oa = np.dot (self.W3, h2) + self.b3
oa.shape = (10,)


os = softmax (oa)
os.shape = (10,)




AttributeError: 'NN' object has no attribute 'shape'

In [10]:
h = np.zeros(shape=(10, 10 ))
h[0, 1]=1
h[0, 1]


1.0