In [2]:

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


In [3]:
def softmax(x):
    shiftx = x - np.max(x)
    exps=np.exp(shiftx)
    y=exps/np.sum(exps)
    return y

def relu (x):
    y=np.maximum(0, x)
    return y

#function taken from IFT6093 cours
def onehot(y, n_classes):
    o = np.zeros(shape=(y.shape[0], n_classes))
    for i in range(y.shape[0]):
        o[i, int(y[i])] = 1
    return o

In [4]:
#This class implementation is inspired from the NN implemented in cours IFT6093
class NN(object):
    
    
    def __init__(self, input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2,mode=',train',
                 datapath=None,model_path=None):
        
        self.indim = input_dim
        self.hd1 = hidden_dims[0] 
        self.hd2 = hidden_dims[1]
        self.n_hidden = n_hidden
        self.outd = output_dim
        self.W1 = np.zeros(shape=(hidden_dims[0], input_dim))
        print('W1.shape =', self.W1.shape)
        #print('W1 = ', self.W1)
        print('\n')
        
        self.b1 = np.zeros(hidden_dims[0])
        print('b1.shape =', self.b1.shape)
        #print('b1 = ', self.b1)
        print('\n')
        
        self.W2 = np.zeros(shape=(hidden_dims[1], hidden_dims[0]))
        print('W2.shape =', self.W2.shape)
        #print('W2 = ', self.W2)
        print('\n')
        
        self.b2 = np.zeros(hidden_dims[1])
        print('b2.shape =', self.b2.shape)
        #print('b2 = ', self.b2)
        print('\n')
        
        self.W3 = np.zeros(shape=(output_dim, hidden_dims[1]))
        print('W3.shape =', self.W3.shape)
        #print('W3 = ', self.W3)
        print('\n')
        
        self.b3 = np.zeros(output_dim)
        print('b3.shape =', self.b3.shape)
        #print('b3 = ', self.b3)
        print('\n')
        
        self.parameters = [self.W3, self.b3, self.W2, self.b2, self.W1, self.b1]
        
        
    def initialize_weights_normal(self):
        
        self.W1 = np.random.normal(loc=0.0, scale=1.0, size=(self.hd1, self.indim))
        self.W2 = np.random.normal(loc=0.0, scale=1.0, size=(self.hd2, self.hd1))
        self.W3 = np.random.normal(loc=0.0, scale=1.0, size=(self.outd, self.hd2))
    
    
    def initialize_weights_glorot(self,n_hidden,dims):
        
        dl1 = np.sqrt(6/(self.indim + self.hd1))
        dl2 = np.sqrt(6/(self.hd1 + self.hd2))
        dl3 = np.sqrt(6/(self.hd2 + self.outd))
        self.W1 = np.random.uniform(low=(-dl1), high=dl1, size=(self.hd1, self.indim))
        self.W2 = np.random.uniform(low=(-dl2), high=dl2, size=(self.hd2, self.hd1))
        self.W3 = np.random.uniform(low=(-dl3), high=dl3, size=(self.outd, self.hd2))
        
        
    #Method inspired from NN implemented in cours IFT6093
    def activation (self,input):
        return (input > 0) * input    
    

    def forward(self,input):
        
        a1 = np.dot (self.W1, x) + self.b1 
        print('a1 = np.dot (self.W1, x) + self.b1')
        print('a1.shape =', a1.shape)
        #print('a1 = ', a1)
        print('\n')
        
        h1 = self.activation (a1)
        print('h1 = self.activation (a1)')
        print('h1.shape =', h1.shape)
        #print('h1 = ', h1)
        print('\n')
        
        a2 = np.dot (self.W2, h1) + self.b2
        print('a2 = np.dot (self.W2, h1) + self.b2')
        print('a2.shape =', a2.shape)
        #print('a2 = ', a2)
        print('\n')
        
        h2 = self.activation (a2)
        print('h2 = self.activation (a2)')
        print('h2.shape =', h2.shape)
        #print('h2 = ', h2)
        print('\n')
        
        oa = np.dot (self.W3, h2) + self.b3
        print('oa = np.dot (self.W3, h2) + self.b3')
        print('oa.shape =', oa.shape)
        #print('oa = ', oa)
        print('\n')
        
        os = softmax (oa)
        print('os = softmax (oa)')
        print('os.shape =', os.shape)
        #print('os = ', os)
        print('\n')
               
        return a1, h1, a2, h2, oa, os
    
    

    #Method inspired from NN implemented in cours IFT6093
    def loss (self,prediction,os):
        return (y * (-np.log(os))).sum()
    

    def softmax (self,input,axis=1):
        shiftx = x - np.max (x, axis=axis, keepdims=True)
        exps = np.exp (shiftx)
        y = exps / exps.sum (axis=axis, keepdims=True)
        return y
    

    def backward(self, x, y, a1, h1, a2, h2, oa, os, weight_decay=0, cache=None):
        print('x.shape = ', x.shape)
        print('y.shape = ', y.shape)
        print('os.shape = ', os.shape)
        grad_oa = os - y
        print('grad_oa.shape =', grad_oa.shape)
        grad_W3 = np.outer (grad_oa, h2) + weight_decay * self.W3
        print('grad_W3.shape =', grad_W3.shape)
        grad_b3 = grad_oa
        print('grad_b3.shape =', grad_b3.shape)
        grad_h2 = np.dot (self.W3.T, grad_oa)
        print(' grad_h2.shape =', grad_h2.shape)
        grad_a2 = (a2 > 0) * grad_h2
        print('grad_a2.shape =', grad_a2.shape)
        grad_W2 = np.outer (grad_a2, h1) + weight_decay * self.W2
        print('grad_W2.shape =', grad_W2.shape)
        grad_b2 = grad_a2 
        print('grad_b2.shape =', grad_b2.shape)
        grad_h1 = np.dot (self.W2.T, grad_a2)
        print('grad_h1.shape =', grad_h1.shape)
        grad_a1 = (a1 > 0) * grad_h1
        print('grad_a1.shape =', grad_a1.shape)
        grad_W1 = np.outer (grad_a1, x) + weight_decay * self.W1
        print('grad_W1.shape =', grad_W1.shape)
        grad_b1 = grad_a1
        print('grad_b1.shape =', grad_b1.shape)
        
        grads=[grad_W3, grad_b3, grad_W2, grad_b2, grad_W1, grad_b1]
   
        return grads



    def update(self, grads, mu):
        for p, grad in zip(self.parameters, grads):
            p -= mu * grad
        
       

    def train_SGD(self, x, y, epoch, n, learning_rate=1e-1, weight_decay=0):
        print('x.shape = ', x.shape)
        print('y.shape = ', y.shape)
        avgLoss=np.zeros((epoch, 2))
        i=0
        while (i<epoch):
            losses = 0
            if (n==1):
                a1, h1, a2, h2, oa, os = self.forward(x)
                grads = self.backward(x, y, a1, h1, a2, h2, oa, os)
                self.update(grads, learning_rate)
                loss = self.loss(y, os)
                losses += loss                          
            else:    
                for j in range (n):
                    xj=x[j]
                    yj=y[j]
                    a1, h1, a2, h2, oa, os = self.forward(x[j])
                    grads = self.backward(self, x, y, a1, h1, a2, h2, oa, os)
                    self.update(grads, learning_rate)
                    loss = self.loss(self, y[j], os)
                    losses += loss 
            average_loss = losses / n    
            avgLoss[i, 0]= i
            avgLoss[i, 1]= average_loss
            i+=1
            
        return avgLoss
    
    
    def forward_mbatch(self,input,labels):
        pass
        
    def backward_mbatch(self,cache, x, y,a1, h1, a2, h2, oa, os, weight_decay=0):
        pass
        
        
    
    #training with minibatch gradient decent
    def train_mbatch(self, x, y, mb_size=100, learning_rate=1e-1, weight_decay=0):
        
        for i in range (0, x.shape[0], mb_size):
        
            xi = x[i:(i+mb_size)]
            yi = y[i:(i+mb_size)]
            
            losses = 0
            for j in range (mb_size):
                a1, h1, a2, h2, oa, os = self.forward(self,xi,yi)
                grad = self.backward(self, xi, yi,a1, h1, a2, h2,oa, os,cache=none)
                self.parameters = update_parms(average_grads, mu)
                loss = self.loss(self, yi, os)
                losses += loss                          
            average_loss = losses / xi.shape[0]
                    
            return average_loss


In [5]:
def prediction (self, x):
    predictions = np.zeros(x.shape[0])
    for i in range(x.shape[0]):
        a1, h1, a2, h2, oa, os = self.forward(x[i])
        predictions[i] = os.argmax()
    return predictions

In [9]:
def test(self):
    .
    .

SyntaxError: invalid syntax (<ipython-input-9-a84208c16e35>, line 2)

Implementation

In [6]:
#backpropagation for 1 exemple


# input_dim, n, output_dim,hidden_dims=(1024,2048),n_hidden=2,mode=',train', datapath=None,model_path=None

NN_model= NN(780, 10, hidden_dims=(500,300))

x = np.random.uniform(-1, 1, size=(780, ))
print('x.shape = ', x.shape)
#print('x = ', x)
#print('\n')

y = np.zeros(shape=(10, ))
y[1] = 1
print('y.shape = ', y.shape)
#print('y = ', y)
#print('\n')

a1, h1, a2, h2, oa, os = NN_model.forward(x)

#self,cache, x, y,a1, h1, a2, h2, oa, os, weight_decay=0)
grads=NN_model.backward(x, y, a1, h1, a2, h2, oa, os)

for grad in grads:
    print('gradients computed by bprop: \n grad_W3, grad_b3, grad_W2, grad_b2, grad_W1, grad_b1',
      grad.shape)



W1.shape = (500, 780)


b1.shape = (500,)


W2.shape = (300, 500)


b2.shape = (300,)


W3.shape = (10, 300)


b3.shape = (10,)


x.shape =  (780,)
y.shape =  (10,)
a1 = np.dot (self.W1, x) + self.b1
a1.shape = (500,)


h1 = self.activation (a1)
h1.shape = (500,)


a2 = np.dot (self.W2, h1) + self.b2
a2.shape = (300,)


h2 = self.activation (a2)
h2.shape = (300,)


oa = np.dot (self.W3, h2) + self.b3
oa.shape = (10,)


os = softmax (oa)
os.shape = (10,)


x.shape =  (780,)
y.shape =  (10,)
os.shape =  (10,)
grad_oa.shape = (10,)
grad_W3.shape = (10, 300)
grad_b3.shape = (10,)
 grad_h2.shape = (300,)
grad_a2.shape = (300,)
grad_W2.shape = (300, 500)
grad_b2.shape = (300,)
grad_h1.shape = (500,)
grad_a1.shape = (500,)
grad_W1.shape = (500, 780)
grad_b1.shape = (500,)
gradients computed by bprop: 
 grad_W3, grad_b3, grad_W2, grad_b2, grad_W1, grad_b1 (10, 300)
gradients computed by bprop: 
 grad_W3, grad_b3, grad_W2, grad_b2, grad_W1, grad_b1 (10,)
gradients computed by bprop: 
 grad_W3, 

In [7]:
#training 10 epoch for 1 exemple

#x, y, epoch, n, learning_rate=1e-1, weight_decay=0
loss=NN_model.train_SGD(x, y, 10, 1)
print(loss)

x.shape =  (780,)
y.shape =  (10,)
a1 = np.dot (self.W1, x) + self.b1
a1.shape = (500,)


h1 = self.activation (a1)
h1.shape = (500,)


a2 = np.dot (self.W2, h1) + self.b2
a2.shape = (300,)


h2 = self.activation (a2)
h2.shape = (300,)


oa = np.dot (self.W3, h2) + self.b3
oa.shape = (10,)


os = softmax (oa)
os.shape = (10,)


x.shape =  (780,)
y.shape =  (10,)
os.shape =  (10,)
grad_oa.shape = (10,)
grad_W3.shape = (10, 300)
grad_b3.shape = (10,)
 grad_h2.shape = (300,)
grad_a2.shape = (300,)
grad_W2.shape = (300, 500)
grad_b2.shape = (300,)
grad_h1.shape = (500,)
grad_a1.shape = (500,)
grad_W1.shape = (500, 780)
grad_b1.shape = (500,)
a1 = np.dot (self.W1, x) + self.b1
a1.shape = (500,)


h1 = self.activation (a1)
h1.shape = (500,)


a2 = np.dot (self.W2, h1) + self.b2
a2.shape = (300,)


h2 = self.activation (a2)
h2.shape = (300,)


oa = np.dot (self.W3, h2) + self.b3
oa.shape = (10,)


os = softmax (oa)
os.shape = (10,)


x.shape =  (780,)
y.shape =  (10,)
os.shape =  (10,)
grad_o

In [1]:
i=0
i+=1
i

1