In [1]:
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


In [5]:
#This class implementation is inspired from the NN implemented in cours IFT6093
class NN(object):
    
    
    def __init__(self, input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2, initialization='zeros', mode=',train',
                 datapath=None,model_path=None):
        
        self.indim = input_dim
        self.hd1 = hidden_dims[0] 
        self.hd2 = hidden_dims[1]
        self.n_hidden = n_hidden
        self.outd = output_dim
        self.W1 = np.zeros(shape=(hidden_dims[0], input_dim))
        #print('W1.shape =', self.W1.shape)
        #print('W1 = ', self.W1)
        #print('\n')
        
        self.b1 = np.zeros(hidden_dims[0])
        #print('b1.shape =', self.b1.shape)
        #print('b1 = ', self.b1)
        #print('\n')
        
        self.W2 = np.zeros(shape=(hidden_dims[1], hidden_dims[0]))
        #print('W2.shape =', self.W2.shape)
        #print('W2 = ', self.W2)
        #print('\n')
        
        self.b2 = np.zeros(hidden_dims[1])
        #print('b2.shape =', self.b2.shape)
        #print('b2 = ', self.b2)
        #print('\n')
        
        self.W3 = np.zeros(shape=(output_dim, hidden_dims[1]))
        #print('W3.shape =', self.W3.shape)
        #print('W3 = ', self.W3)
        #print('\n')
        
        self.b3 = np.zeros(output_dim)
        #print('b3.shape =', self.b3.shape)
        #print('b3 = ', self.b3)
        #print('\n')
        
        if initialization=='normal':
            self.initialize_weights_normal()
            #print('W1 = ', self.W1)
            #print('W2 = ', self.W2)
            #print('W3 = ', self.W3)
            
            
        if initialization=='glorot':
            self.initialize_weights_glorot()
            #print('W1 = ', self.W1)
            #print('W2 = ', self.W2)
            #print('W3 = ', self.W3)
            
        
        self.parameters = [self.W3, self.b3, self.W2, self.b2, self.W1, self.b1]
        
        
    def initialize_weights_normal(self):
        
        self.W1 = np.random.normal(loc=0.0, scale=1.0, size=(self.hd1, self.indim))
        self.W2 = np.random.normal(loc=0.0, scale=1.0, size=(self.hd2, self.hd1))
        self.W3 = np.random.normal(loc=0.0, scale=1.0, size=(self.outd, self.hd2))
        
        return self
    
    
    def initialize_weights_glorot(self):
        
        dl1 = np.sqrt(6/(self.indim + self.hd1))
        dl2 = np.sqrt(6/(self.hd1 + self.hd2))
        dl3 = np.sqrt(6/(self.hd2 + self.outd))
        self.W1 = np.random.uniform(low=(-dl1), high=dl1, size=(self.hd1, self.indim))
        self.W2 = np.random.uniform(low=(-dl2), high=dl2, size=(self.hd2, self.hd1))
        self.W3 = np.random.uniform(low=(-dl3), high=dl3, size=(self.outd, self.hd2))
        
        return self
        
        
        
    #Method inspired from NN implemented in cours IFT6093
    def activation (self,input):
        return (input > 0) * input  
    
    #line 85

    def forward(self,x):
        #print('forward')
        
        a1 = np.dot (self.W1, x) + self.b1 
        #print('a1 = np.dot (self.W1, x) + self.b1')
        #print('a1.shape =', a1.shape)
        #print('a1 = ', a1)
        #print('\n')
        
        h1 = self.activation (a1)
        #print('h1 = self.activation (a1)')
        #print('h1.shape =', h1.shape)
        #print('h1 = ', h1)
        #print('\n')
        
        a2 = np.dot (self.W2, h1) + self.b2
        #print('a2 = np.dot (self.W2, h1) + self.b2')
        #print('a2.shape =', a2.shape)
        #print('a2 = ', a2)
        #print('\n')
        
        h2 = self.activation (a2)
        #print('h2 = self.activation (a2)')
        #print('h2.shape =', h2.shape)
        #print('h2 = ', h2)
        #print('\n')
        
    
        oa = np.dot (self.W3, h2) + self.b3
        #print('oa = np.dot (self.W3, h2) + self.b3')
        #print('oa.shape =', oa.shape)
        #print('oa = ', oa)
        #print('\n')
        
        os = self.softmax (oa, axis=0)
        #print('os = softmax (oa)')
        #print('os.shape =', os.shape)
        #print('os = ', os)
        #print('\n')
               
        return a1, h1, a2, h2, oa, os
    
    

    #Method inspired from NN implemented in cours IFT6093
    def loss (self, y, os):
        return (y * (-np.log(os))).sum()
    

    def softmax (self,x,axis=1):
        shiftx = x - np.max (x, axis=axis, keepdims=True)
        exps = np.exp (shiftx)
        y = exps / exps.sum (axis=axis, keepdims=True)
        return y


    def backward(self, x, y, a1, h1, a2, h2, oa, os, weight_decay=0, cache=None):
        #print ('backward')
        #print('x.shape = ', x.shape)
        #print('y.shape = ', y.shape)
        #print('os.shape = ', os.shape)
        grad_oa = os - y
        #print('grad_oa.shape =', grad_oa.shape)
        #print('\n')
        
        grad_W3 = np.outer (grad_oa, h2) + weight_decay * self.W3
        #print('grad_W3.shape =', grad_W3.shape)
        #print('\n')
        
        grad_b3 = grad_oa
        #print('grad_b3.shape =', grad_b3.shape)
        #print('\n')
        
        grad_h2 = np.dot (self.W3.T, grad_oa)
        #print(' grad_h2.shape =', grad_h2.shape)
        #print('\n')
        
        grad_a2 = (a2 > 0) * grad_h2
        #print('grad_a2.shape =', grad_a2.shape)
        #print('\n')
        
        grad_W2 = np.outer (grad_a2, h1) + weight_decay * self.W2
        #print('grad_W2.shape =', grad_W2.shape)
        #print('\n')
        
        grad_b2 = grad_a2 
        #print('grad_b2.shape =', grad_b2.shape)
        #print('\n')
        
        grad_h1 = np.dot (self.W2.T, grad_a2)
        #print('grad_h1.shape =', grad_h1.shape)
        #print('\n')
        
        grad_a1 = (a1 > 0) * grad_h1
        #print('grad_a1.shape =', grad_a1.shape)
        #print('\n')
        
        grad_W1 = np.outer (grad_a1, x) + weight_decay * self.W1
        #print('grad_W1.shape =', grad_W1.shape)
        #print('\n')
        
        grad_b1 = grad_a1
        #print('grad_b1.shape =', grad_b1.shape)
        #print('\n')
        
        grads=[grad_W3, grad_b3, grad_W2, grad_b2, grad_W1, grad_b1]
   
        return grads



    def update(self, grads, learning_rate):
        for p, grad in zip(self.parameters, grads):
            p -= learning_rate * grad
        
    #line 201   

    def train_SGD(self, x, y_onehot, n, learning_rate=1e-1, weight_decay=0):
        y= y_onehot
        #print('x.shape = ', x.shape)
        #print('y.shape = ', y.shape)
        losses = 0
        if (n==1):
            a1, h1, a2, h2, oa, os = self.forward(x)
            grads = self.backward(x, y, a1, h1, a2, h2, oa, os)
            self.update(grads, learning_rate)
            loss = self.loss(y, os)
            losses += loss  
            average_loss = losses / n
        else:    
            for j in range(x.shape[0]):
                a1, h1, a2, h2, oa, os = self.forward(x[j])
                grads = self.backward(x[j], y[j], a1, h1, a2, h2, oa, os)
                self.update(grads, learning_rate)
                loss = self.loss(y[j], os)
                losses += loss 
                
            average_loss = losses / n
            #print (average_loss)

        #print (average_loss)   
        return average_loss
    
    
    def prediction_SGD (self, x):
        predictions = np.zeros(x.shape[0])
        for i in range(x.shape[0]):
            _, _, _, _, _, os = self.forward(x[i])
            predictions[i] = os.argmax(axis=0)
            
        return predictions
    
    def accuracy_SGD (self, prediction, y):
        accuracies=0
        for i in range (y.shape[0]):
            accuracies+=(prediction[i]==y[i])
            
        return accuracies / y.shape[0]
    
    
    def test_SGD(self, x, y_onehot, y):
        pred=np.zeros(y.shape[0])
        avg_loss=0
        for i in range (x.shape[0]):
            _, _, _, _, _, os = self.forward(x[i])
            loss=self.loss (y_onehot[i], os)
            avg_loss+=loss
            pred[i]=os.argmax()
            
        accuracy=self.accuracy_SGD(pred, y)    
        return avg_loss / x.shape[0] , accuracy
    
   
    def forward_mbatch(self, x):
        #print ('forward minibtach')
        a1 = np.dot ( x, self.W1.T) + self.b1 
        #print('a1 = np.dot (x, self.W1.T) + self.b1')
        #print('a1.shape =', a1.shape)
        #print('a1 = ', a1)
        #print('\n')
        
        h1 = self.activation (a1)
        #print('h1 = self.activation (a1)')
        #print('h1.shape =', h1.shape)
        #print('h1 = ', h1)
        #print('\n')
        
        a2 = np.dot (h1, self.W2.T) + self.b2
        #print('a2 = np.dot (h1, self.W2.T) + self.b2')
        #print('a2.shape =', a2.shape)
        #print('a2 = ', a2)
        #print('\n')
        
        h2 = self.activation (a2)
        #print('h2 = self.activation (a2)')
        #print('h2.shape =', h2.shape)
        #print('h2 = ', h2)
        #print('\n')
        
        oa = np.dot (h2, self.W3.T) + self.b3
        #print('oa = np.dot (h2, self.W3.T) + self.b3')
        #print('oa.shape =', oa.shape)
        #print('oa = ', oa)
        #print('\n')
        
        os = self.softmax (oa, axis=1)
        #print('os = softmax (oa)')
        #print('os.shape =', os.shape)
        #print('os = ', os)
        #print('\n')
               
        return a1, h1, a2, h2, oa, os
    
    #line 303
        
    def backward_mbatch(self, x, y, a1, h1, a2, h2, oa, os, batch_n, weight_decay=0):
        #print ('backward minibatch')
        
        #print('x.shape = ', x.shape)
        #print('y.shape = ', y.shape)
        #print('os.shape = ', os.shape)
        
        
        batch_n = x.shape[0]
        bgrad_oa = os - y
        #print('bgrad_oa.shape =', bgrad_oa.shape)
        #print('\n')
        
        bgrad_W3 = np.dot (bgrad_oa.T, h2) / batch_n  + weight_decay * self.W3
        #print('bgrad_W3.shape =', bgrad_W3.shape)
        #print('\n')
        
        bgrad_b3 = bgrad_oa.mean(axis=0)
        #print('bgrad_b3.shape =', bgrad_b3.shape)
        #print('\n')
        
        bgrad_h2 = np.dot (bgrad_oa, self.W3)
        #print(' bgrad_h2.shape =', bgrad_h2.shape)
        #print('\n')
        
        bgrad_a2 = (a2 > 0) * bgrad_h2
        #print('bgrad_a2.shape =', bgrad_a2.shape)
        #print('\n')
        
        bgrad_W2 = np.dot (bgrad_a2.T, h1) / batch_n  + weight_decay * self.W2
        #print('bgrad_W2.shape =', bgrad_W2.shape)
        #print('\n')
        
        bgrad_b2 = bgrad_a2.mean(axis=0) 
        #print('bgrad_b2.shape =', bgrad_b2.shape)
        #print('\n')
        
        bgrad_h1 = np.dot (bgrad_a2, self.W2)
        #print('bgrad_h1.shape =', bgrad_h1.shape)
        #print('\n')
        
     
        bgrad_a1 = (a1 > 0) * bgrad_h1
        #print('bgrad_a1.shape =', bgrad_a1.shape)
        #print('\n')
        
        bgrad_W1 = np.dot (bgrad_a1.T, x) / batch_n  + weight_decay * self.W1
        #print('bgrad_W1.shape =', bgrad_W1.shape)
        #print('\n')
        
        bgrad_b1 = bgrad_a1.mean(axis=0)
        #print('bgrad_b1.shape =', bgrad_b1.shape)
        #print('\n')
        
        bgrads=[bgrad_W3, bgrad_b3, bgrad_W2, bgrad_b2, bgrad_W1, bgrad_b1]
   
        return bgrads

    #line 360

    #Method taken fron homwork 3 in cours IFT6093
    def loss_mbatch(self, os, y):
        return (y * (-np.log(os))).sum(axis=1).mean(axis=0)     
        
    
    #training with minibatch gradient decent
    def train_mbatch(self, x, y_onehot, mb_size=100, learning_rate=1e-1, weight_decay=0):
        average_loss=0
        for i in range (0, x.shape[0], mb_size):
            #print (i)
            xi = x[i:(i+mb_size)]
            yi = y_onehot[i:(i+mb_size)]
        
            losses = 0
            a1, h1, a2, h2, oa, os = self.forward_mbatch(xi)
            grads = self.backward_mbatch (xi, yi,a1, h1, a2, h2,oa, os, mb_size)
            self.update(grads, learning_rate)
            average_loss = self.loss_mbatch(os, yi) 
                          
        return average_loss
    
    
    #line 385
    
    def prediction_mbatch (self, x):
        _, _, _, _, _, os = self.forward_mbatch(x)
        return os.argmax(axis=1)
    

    def accuracy_mbatch (self, prediction, y):
        accuracy = np.zeros(y.shape[0])
        accuracy = prediction == y
        return accuracy.mean(axis=0)
    

    def test_mbatch(self, x, y_onehot, y):
        _, _, _, _, _, os = self.forward_mbatch(x)
        loss = self.loss_mbatch (os, y_onehot)
        accuracy=self.accuracy_mbatch (os.argmax(axis=1), y)
        return loss, accuracy
    
    
    def finite_difference():
        
        pass    
        


In [6]:
def mat_test(self, x, y):
    _, _, _, os = self.mat_fprop(x)
    return self.mat_loss(os, y), os.argmax(axis=1)
    

In [7]:
def softmax (self, x):
        shiftx = x - np.max(x)
        exps=np.exp(shiftx)
        y=exps/np.sum(exps)
        return y

def relu (x):
    y=np.maximum(0, x)
    return y

#function taken from IFT6093 cours
def onehot(y, n_classes):
    o = np.zeros(shape=(y.shape[0], n_classes))
    for i in range(y.shape[0]):
        o[i, int(y[i])] = 1
    return o

Implementation

In [8]:
#backpropagation for 1 exemple


# self, input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2, initialization=zeros, mode=',train',
# datapath=None,model_path=None

NN_model= NN(780, 10, hidden_dims=(500,300))

x = np.random.uniform(-1, 1, size=(780))
print('x.shape = ', x.shape)
print('\n')

y = np.zeros(shape=(10, ))
y[1] = 1
print('y.shape = ', y.shape)
print('\n')

a1, h1, a2, h2, oa, os = NN_model.forward(x)

#self,cache, x, y,a1, h1, a2, h2, oa, os, weight_decay=0)
grads=NN_model.backward(x, y, a1, h1, a2, h2, oa, os)

print ('a1 shape = ', a1.shape)
print ('h1 shape = ', h1.shape)
print ('a2 shape = ', a2.shape)
print ('h2 shape = ', h2.shape)
print ('oa shape = ', oa.shape)
print ('os shape = ', os.shape)


print ('grad_W3 shape = ', grads[0].shape)
print ('grad_b3 shape = ', grads[1].shape)
print ('grad_W2 shape = ', grads[2].shape)
print ('grad_b2 shape = ', grads[3].shape)
print ('grad_W1 shape = ', grads[4].shape)
print ('grad_b1 shape = ', grads[5].shape)

x.shape =  (780,)


y.shape =  (10,)


a1 shape =  (500,)
h1 shape =  (500,)
a2 shape =  (300,)
h2 shape =  (300,)
oa shape =  (10,)
os shape =  (10,)
grad_W3 shape =  (10, 300)
grad_b3 shape =  (10,)
grad_W2 shape =  (300, 500)
grad_b2 shape =  (300,)
grad_W1 shape =  (500, 780)
grad_b1 shape =  (500,)


In [9]:
#training 10 epoch for 1 exemple

#x, y_onehot, n, learning_rate=1e-1, weight_decay=0
epochs=10
for epoch in range (epochs):
    loss=NN_model.train_SGD(x, y, 1)
    print('epoch ', epoch, ' loss ', loss)
    


epoch  0  loss  2.3025850929940455
epoch  1  loss  2.2130472649209176
epoch  2  loss  2.125400286246365
epoch  3  loss  2.039751468982383
epoch  4  loss  1.956204563387701
epoch  5  loss  1.8748581566171187
epoch  6  loss  1.7958040494881427
epoch  7  loss  1.7191256666613888
epoch  8  loss  1.644896559738838
epoch  9  loss  1.5731790633976404


In [10]:
#training for a small data set

x = np.random.uniform(-1, 1, size=(10, 780))
print('x.shape = ', x.shape)
#print('x = ', x)
#print('\n')

y = np.zeros(shape=(10, 10))
y[0, 1] = 1
y[1, 2] = 1
y[2, 7] = 1
y[3, 6] = 1
y[4, 9] = 1
y[5, 4] = 1
y[6, 1] = 1
y[7, 6] = 1
y[8, 5] = 1
y[9, 5] = 1

print('y.shape = ', y.shape)
#print('y = ', y)
print('\n')

# input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2,mode=',train',
# datapath=None,model_path=None


NN_model_2= NN(780, 10, hidden_dims=(500,300))
epochs=10
for epoch in range (epochs):
    #x, y_onehot, n, learning_rate=1e-1, weight_decay=0
    loss=NN_model_2.train_SGD(x, y, 10)
    print('epoch ', epoch, ' loss ', loss)


x.shape =  (10, 780)
y.shape =  (10, 10)


epoch  0  loss  2.318461256069697
epoch  1  loss  2.2679783604520307
epoch  2  loss  2.226655896296422
epoch  3  loss  2.1927530682991625
epoch  4  loss  2.1648286547970996
epoch  5  loss  2.1417055808926277
epoch  6  loss  2.1224331160047236
epoch  7  loss  2.1062498429317165
epoch  8  loss  2.0925496365090255
epoch  9  loss  2.0808518553875017


In [11]:
#Code inspired from cours IFT6093 homework 3

data_circles = np.loadtxt(open('cercles.txt','r'))
cercle_x=np.array(data_circles[:, :-1])
cercle_y=np.array(data_circles[:, -1])

print (data_circles.shape)
i=int(cercle_x.shape[0]*0.8)
print (i)

cercle_x_train=np.array(cercle_x[:i])
cercle_y_train=np.array(cercle_y[:i])
cercle_y_train_onehot= onehot (cercle_y_train, 2)

cercle_x_test=np.array(cercle_x[i:])
cercle_y_test=np.array(cercle_y[i:])
cercle_y_test_onehot= onehot (cercle_y_test, 2)

print (cercle_x_train.shape)
print (cercle_y_train.shape)
print (cercle_y_train_onehot.shape)
print (cercle_x_test.shape)
print (cercle_y_test.shape)
print (cercle_y_test_onehot.shape)


(1100, 3)
880
(880, 2)
(880,)
(880, 2)
(220, 2)
(220,)
(220, 2)


In [12]:
#  SGD training for cercle
import time

# self, input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2, initialization=zeros, mode=',train',
# datapath=None,model_path=None

NN_cercle= NN(2, 2, hidden_dims=(500,300), initialization='glorot')

#training 1 epoch for 1 exemple
start_time = time.time()

epoc=[]
train_losses=[]
train_accuracies=[]
test_losses=[]
test_accuracies=[]


epochs=10
for epoch in range (epochs):
    
    loss=NN_cercle.train_SGD(cercle_x_train, cercle_y_train_onehot, 1100)
    
    loss_train, accuracy_train = NN_cercle.test_SGD(cercle_x_train, cercle_y_train_onehot, cercle_y_train)
    loss_test, accuracy_test = NN_cercle.test_SGD(cercle_x_test, cercle_y_test_onehot, cercle_y_test)
    
    epoc.append(epoch)
    train_losses.append(loss_train)
    train_accuracies.append(accuracy_train)
    test_losses.append(loss_test)
    test_accuracies.append(accuracy_test)
    
    print('epoch ', epoch, 'train loss ', loss_train, 'train accuracy ', accuracy_train)
    print('epoch ', epoch, 'test loss  ', loss_test, 'test accuracy  ', accuracy_test)
    
time_SGD = time.time() - start_time

print('Time with loop implementation: %f seconds\n' % time_SGD)



epoch  0 train loss  0.633445396257121 train accuracy  0.6397727272727273
epoch  0 test loss   0.693030662495672 test accuracy   0.5909090909090909
epoch  1 train loss  0.4570495658904304 train accuracy  0.7181818181818181
epoch  1 test loss   0.5575148827032496 test accuracy   0.6318181818181818
epoch  2 train loss  0.602137287988434 train accuracy  0.8181818181818182
epoch  2 test loss   0.8434372565497008 test accuracy   0.759090909090909
epoch  3 train loss  0.004886710094211944 train accuracy  1.0
epoch  3 test loss   0.005246220795500688 test accuracy   1.0
epoch  4 train loss  0.002201111007805264 train accuracy  1.0
epoch  4 test loss   0.0023664870200501165 test accuracy   1.0
epoch  5 train loss  0.00136439633240216 train accuracy  1.0
epoch  5 test loss   0.0014680274106266004 test accuracy   1.0
epoch  6 train loss  0.000970451978285676 train accuracy  1.0
epoch  6 test loss   0.0010445030897500705 test accuracy   1.0
epoch  7 train loss  0.0007451071328606089 train accurac

In [13]:
#Minibatch training for a small data set

x = np.random.uniform(-1, 1, size=(100, 780))
print('x.shape = ', x.shape)
#print('x = ', x)
print('\n')

y = np.zeros(shape=(100, 10))

for i in range (y.shape[0]):
    if ((random.choice((0, 1)))==0):
        y[i, 0] = 1
    else:
        y[i, 1] = 1 


print('y.shape = ', y.shape)
#print('y = ', y)
print('\n')

# input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2,mode=',train',
# datapath=None,model_path=None

NN_mbatch_1= NN(780, 10, hidden_dims=(500,300))

loss_mbatch_1=0
epochs=10
for epoch in range (epochs): 
    #x, y_onehot, mb_size=100, learning_rate=1e-1, weight_decay=0
    loss_mbatch_1=NN_mbatch_1.train_mbatch(x, y, mb_size=20)
    print('epoch ', epoch, ' loss ', loss_mbatch_1)

x.shape =  (100, 780)


y.shape =  (100, 10)


epoch  0  loss  2.1485107443629756
epoch  1  loss  1.9750229937916353
epoch  2  loss  1.822853044864446
epoch  3  loss  1.6911944532682128
epoch  4  loss  1.5784631657804251
epoch  5  loss  1.4825692104183774
epoch  6  loss  1.4012124532787709
epoch  7  loss  1.332129256477942
epoch  8  loss  1.2732551302001884
epoch  9  loss  1.222804341115755


In [21]:
#  minibatch training for cercle

import time

# input_dim, output_dim,hidden_dims=(1024,2048),n_hidden=2,mode=',train',
# datapath=None,model_path=None

NN_cercle_mbatch= NN(2, 2, hidden_dims=(500,300), initialization='glorot')

#training 20 epoch for whole data set
start_time = time.time()

epoc=[]
train_losses_mb=[]
train_accuracies_mb=[]
test_losses_mb=[]
test_accuracies_mb=[]

epochs=20

print('epoch      train loss      train accuracy         test loss         test accuracy  ')

for epoch in range (epochs): 
#x, y, mb_size=100, learning_rate=1e-1, weight_decay=0
    loss=NN_cercle_mbatch.train_mbatch(cercle_x_train, cercle_y_train_onehot, mb_size=50)
    
    loss_train_mb, accuracy_train_mb = NN_cercle_mbatch.test_mbatch(cercle_x_train,
                                                                    cercle_y_train_onehot,
                                                                    cercle_y_train)
    loss_test_mb, accuracy_test_mb = NN_cercle_mbatch.test_mbatch(cercle_x_test, 
                                                                  cercle_y_test_onehot, 
                                                                  cercle_y_test)
    epoc.append(epoch)
    train_losses_mb.append(loss_train_mb)
    train_accuracies_mb.append(accuracy_train_mb)
    test_losses_mb.append(loss_test_mb)
    test_accuracies_mb.append(accuracy_test_mb)
    
    print(epoch, '  ', loss_train_mb, '  ', accuracy_train_mb , '  ', loss_test_mb,
          '  ', accuracy_test_mb)
    
    
time_mb = time.time() - start_time

print('Time with minibatch gradient decent implementation: %f seconds\n' % time_mb)

epoch      train loss      train accuracy      test loss      test accuracy  
0    0.6890395786978359    0.5329545454545455    0.6936150993436229    0.4727272727272727
1    0.6838755116446754    0.5988636363636364    0.6919646771876107    0.4954545454545455
2    0.6787082721195912    0.6284090909090909    0.6890124046101908    0.5045454545454545
3    0.6731339969930591    0.65    0.6848215310532373    0.5363636363636364
4    0.6668647762073656    0.6772727272727272    0.6794058411875483    0.5636363636363636
5    0.6600804090635161    0.7022727272727273    0.673517537291046    0.6090909090909091
6    0.6525806992636509    0.7375    0.6664865509067868    0.6454545454545455
7    0.643925190452585    0.7636363636363637    0.6580789687610444    0.6954545454545454
8    0.6343847016016778    0.7920454545454545    0.6491075181474436    0.7181818181818181
9    0.6234484569258721    0.8181818181818182    0.6386584494519819    0.7545454545454545
10    0.6106265612811751    0.85    0.626355302533

In [28]:


from mnist import MNIST
mndata = MNIST('C:/Users/Geo/Documents/Bioinformatica/maitrise/representationLearning/devoir 1')


In [29]:
mndata.gz = True

In [None]:
images, labels = mndata.load_training()

In [38]:
print (len(labels))
print(labels[:10])


60000
array('B', [5, 0, 4, 1, 9, 2, 1, 3, 1, 4])


In [21]:
print('X_train.shape = ', X_train.shape)
print('y_train.shape = ', y_train.shape)
print('X_test.shape = ', X_test.shape)
print('y_test.shape = ', y_test.shape)

X_train.shape =  (60000, 784)
y_train.shape =  (60000,)
X_test.shape =  (10000, 784)
y_test.shape =  (10000,)


In [22]:
# Comment to get non-deterministic results
np.random.seed(2)

Question 1.