# Homework 3

In [1]:
# Author: Jiechen Wu, David Alfonso

## 3

In [2]:
%pylab inline
import numpy as np

Populating the interactive namespace from numpy and matplotlib


In [3]:
# cercles data
circles_data = np.loadtxt(open('circles.txt','r'))
np.random.seed(0)
indices = np.random.permutation(len(circles_data[:,-1]))

# Separate into train/test as usual
circles_train_x = circles_data[indices[:1000],:-1]
circles_test_x = circles_data[indices[1000:],:-1]
circles_train_y = circles_data[indices[:1000],-1]
circles_test_y = circles_data[indices[1000:],-1]
# Remap y classes to int 1,2
circles_train_y = (circles_train_y+1).astype(int)
circles_test_y = (circles_test_y+1).astype(int)


# fashion MNIST
import utils.mnist_reader as mnist_reader
X_train, y_train = mnist_reader.load_mnist('data/fashion', kind='train')
X_test, y_test = mnist_reader.load_mnist('data/fashion', kind='t10k')

In [61]:
def rect(x):
    return np.maximum(np.zeros(len(x)),x)

def softmax(x_v):
    sum_exp = np.sum(np.exp(x_v))
    return np.exp(x_v)/sum_exp

class NeuralNetwork:
    def __init__(self,x,y,dh,m):
        self.x = x
        self.y = y
        self.d = len(x)
        self.dh = dh # number of hiden layer neurons
        self.m = m # m classes

        # Parameter initialization
        sqrtr_nc_W1 = 1/self.d**0.5
        self.W1 = np.random.uniform(-sqrtr_nc_W1,sqrtr_nc_W1,(self.dh,self.d))
        self.b1 = np.zeros(self.dh)
        sqrtr_nc_W2 = 1/self.dh**0.5
        self.W2 = np.random.uniform(-sqrtr_nc_W2,sqrtr_nc_W2,(self.m,self.dh))
        self.b2 = np.zeros(self.m)
        self.wb2theta()
        self.gradFinitDiff = np.zeros(len(self.theta))

    def bprop(self):
        model.theta2wb()
        self.grad_oa = self.os - np.eye(self.m)[self.y-1]
        self.grad_hs = self.W2.T @ self.grad_oa
        self.grad_ha = (self.ha>0).astype(int) * self.grad_hs
        self.grad_x = self.W1.T @ self.grad_ha
        self.grad_b2 = self.grad_oa
        self.grad_W2 = np.outer(self.grad_oa, self.hs)
        self.grad_b1 = self.grad_ha
        self.grad_W1 = np.outer(self.grad_ha, self.x)
        self.serial_grad()

    def fprop(self,ischeck = False, check_theta = None):
        if ischeck == False:
            theta = self.theta
            W1,b1,W2,b2 = self.W1,self.b1,self.W2,self.b2
        else:
            theta = check_theta
            W1,b1,W2,b2 = self.unserial_param(check_theta)
        
        self.ha = W1@self.x+b1
        self.hs = rect(self.ha)
        self.oa = W2@self.hs+b2
        self.os = softmax(self.oa)
        self.L = -np.log(self.os[self.y-1])
        return self.L
        
    def serial_param(self,W1,b1,W2,b2):
        return np.concatenate([np.ravel(W1),
                                     np.ravel(b1),
                                     np.ravel(W2),
                                     np.ravel(b2)])
        
    def unserial_param(self,theta):
        dh = self.dh
        d = self.d
        W1 = theta[0:dh*d].reshape((dh,d))
        b1 = theta[dh*d:dh*d+dh]
        W2 = theta[dh*d+dh:dh*d+dh+m*dh].reshape((m,dh))
        b2 = theta[-m:]
        return W1,b1,W2,b2
    
    def wb2theta(self):
        self.theta = self.serial_param(self.W1,self.b1,self.W2,self.b2)
        
    
    def theta2wb(self):
        self.W1,self.b1,self.W2,self.b2 = self.unserial_param(self.theta)
        
    def serial_grad(self):
        self.gradBprop = np.concatenate([np.ravel(self.grad_W1),
                             np.ravel(self.grad_b1),
                             np.ravel(self.grad_W2),
                             np.ravel(self.grad_b2)])

    def cal_finit_difference(self):
        epsilon = 10**-5
        for ind,t in enumerate(self.theta):
            thetaPlus = np.copy(self.theta)
            thetaPlus[ind] += epsilon
            self.gradFinitDiff[ind] = ((self.fprop(True,thetaPlus)-self.fprop())/epsilon)

    def gradient_check(self):
        sys_eps = np.nextafter(0, 1)
        ratio = (self.gradFinitDiff+sys_eps)/(self.gradBprop+sys_eps)
        if np.any(ratio>1.01) or np.any(ratio<0.99):
            print("Gradient check failed.",ratio)
        else:
            print("Gradient check succeeded.")
        print("Gradient Backprop:",self.gradBprop)
        print("Gradient Finit Differenct:",self.gradFinitDiff)

### 3.1

In [74]:
dh = 2
m = 2
model = NeuralNetwork(circles_train_x[0,:],circles_train_y[0],dh,m)
model.fprop()
print('W1=',model.W1)
print('W2=',model.W2)
model.wb2theta()
print('theta=',model.theta)
model.bprop()

W1= [[-0.58679104 -0.06685149]
 [ 0.61188146  0.37217152]]
W2= [[-0.26659896  0.5495853 ]
 [ 0.27898798 -0.05183939]]
theta= [-0.58679104 -0.06685149  0.61188146  0.37217152  0.          0.
 -0.26659896  0.5495853   0.27898798 -0.05183939  0.          0.        ]


In [75]:
model.cal_finit_difference()

In [76]:
model.gradient_check()

Gradient check succeeded.
Gradient Backprop: [ 0.17131098  0.08667191 -0.         -0.         -0.23998526  0.
  0.19486884  0.         -0.19486884 -0.          0.43986621 -0.43986621]
Gradient Finit Differenct: [ 0.17131116  0.08667196  0.          0.         -0.23998489  0.
  0.19486908  0.         -0.1948686   0.          0.43986744 -0.43986497]


### 3.2