In [32]:
import numpy as np
import pandas as pd
from scipy.io import loadmat#load .mat files
import scipy.optimize #using fmincg to training
import warnings
warnings.filterwarnings('ignore')

#gatau
import itertools
from scipy.special import expit

## import data

In [33]:
data = loadmat('ex4data1.mat')
X = data['X']
X = np.c_[np.ones((X.shape[0],1)),X]
y = data['y']
print('X:', X.shape)
print('y:', y.shape)
print('label y:', np.unique(y))

X: (5000, 401)
y: (5000, 1)
label y: [ 1  2  3  4  5  6  7  8  9 10]


## import weights/params/thetas

In [34]:
weights = loadmat('ex4weights.mat')
Theta1, Theta2 = weights['Theta1'], weights['Theta2']
print('Theta1:',Theta1.shape)
print('Theta2:',Theta2.shape)

Theta1: (25, 401)
Theta2: (10, 26)


In [35]:
input_layer_size = 400
hidden_layer_size = 25
output_layer_size = 10 
num_labels = output_layer_size
[m,n] = X.shape
n_training_samples = m

# functionS

In [36]:
def sigmoid(z):
    return(1 / (1 + np.exp(-z)))

def sigmoidGradient(z):
    return(sigmoid(z)*(1-sigmoid(z)))

def computeCost(nn_params, input_layer_size, hidden_layer_size, num_labels, features, classes, reg):
    
    # When comparing to Octave code note that Python uses zero-indexed arrays.
    # But because Numpy indexing does not include the right side, the code is the same anyway.
    theta1 = nn_params[0:(hidden_layer_size*(input_layer_size+1))].reshape(hidden_layer_size,(input_layer_size+1))
    theta2 = nn_params[(hidden_layer_size*(input_layer_size+1)):].reshape(num_labels,(hidden_layer_size+1))

    m = features.shape[0]
    y_matrix = pd.get_dummies(classes.ravel()).as_matrix() 
    
    # Cost
    a1 = features # 5000x401
        
    z2 = theta1.dot(a1.T) # 25x401 * 401x5000 = 25x5000 
    a2 = np.c_[np.ones((features.shape[0],1)),sigmoid(z2.T)] # 5000x26 
    
    z3 = theta2.dot(a2.T) # 10x26 * 26x5000 = 10x5000 
    a3 = sigmoid(z3) # 10x5000
    
    J = -1*(1/m)*np.sum((np.log(a3.T)*(y_matrix) + np.log(1-a3).T*(1-y_matrix))) + \
        (reg/(2*m))*(np.sum(np.square(theta1[:,1:])) + np.sum(np.square(theta2[:,1:])))

    return(J)

reg=1
nn_params= np.r_[Theta1.ravel(),Theta2.ravel()]
computeCost(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, reg)

0.3837698590909236

In [37]:
def backPropagate(nn_params, input_layer_size, hidden_layer_size, num_labels, features, classes, reg):
    
    # When comparing to Octave code note that Python uses zero-indexed arrays.
    # But because Numpy indexing does not include the right side, the code is the same anyway.
    theta1 = nn_params[0:(hidden_layer_size*(input_layer_size+1))].reshape(hidden_layer_size,(input_layer_size+1))
    theta2 = nn_params[(hidden_layer_size*(input_layer_size+1)):].reshape(num_labels,(hidden_layer_size+1))

    m = features.shape[0]
    y_matrix = pd.get_dummies(classes.ravel()).as_matrix() 
    
    # Cost
    a1 = features # 5000x401
        
    z2 = theta1.dot(a1.T) # 25x401 * 401x5000 = 25x5000 
    a2 = np.c_[np.ones((features.shape[0],1)),sigmoid(z2.T)] # 5000x26 
    
    z3 = theta2.dot(a2.T) # 10x26 * 26x5000 = 10x5000 
    a3 = sigmoid(z3) # 10x5000
    
    # Gradients
    d3 = a3.T - y_matrix # 5000x10
    d2 = theta2[:,1:].T.dot(d3.T)*sigmoidGradient(z2) # 25x10 *10x5000 * 25x5000 = 25x5000
    
    delta1 = d2.dot(a1) # 25x5000 * 5000x401 = 25x401
    delta2 = d3.T.dot(a2) # 10x5000 *5000x26 = 10x26
    
    theta1_ = np.c_[np.ones((theta1.shape[0],1)),theta1[:,1:]]
    theta2_ = np.c_[np.ones((theta2.shape[0],1)),theta2[:,1:]]
    
    theta1_grad = delta1/m + (theta1_*reg)/m
    theta2_grad = delta2/m + (theta2_*reg)/m

    flattenedD1D2= np.r_[theta1_grad.ravel(),theta2_grad.ravel()]
    
    return(flattenedD1D2)

# INITIAL PARAMS

In [38]:
def createRandomParam(Lin,Lout):
    #iParam = np.zeros((Lout,Lin+1))
    eps = 0.12;
    iParam = np.random.rand(Lout,Lin+1)* 2 * eps - eps;

    return(iParam)

In [39]:
iTheta1 = createRandomParam(input_layer_size,hidden_layer_size)
iTheta2 = createRandomParam(hidden_layer_size,num_labels)
initialParams= np.r_[iTheta1.ravel(),iTheta2.ravel()]

# trainingData 
to get optimum weights/params

In [40]:
def trainNN(initialParams,mylambda=0.):

    result = scipy.optimize.fmin_cg(computeCost, x0=initialParams, fprime=backPropagate,args=(input_layer_size, hidden_layer_size, num_labels,X,y,mylambda),maxiter=50,disp=True,full_output=True)
    return (result[0])

In [41]:
ThetaOpt = trainNN(initialParams)

tOpt1 = ThetaOpt[0:(hidden_layer_size*(input_layer_size+1))].reshape(hidden_layer_size,(input_layer_size+1))
tOpt2 = ThetaOpt[(hidden_layer_size*(input_layer_size+1)):].reshape(num_labels,(hidden_layer_size+1))
ThetaOpt.shape

         Current function value: 0.263022
         Iterations: 50
         Function evaluations: 117
         Gradient evaluations: 117


(10285,)

In [42]:
tOpt1.shape

(25, 401)

In [43]:
tOpt1

array([[-0.15631347,  0.04814506,  0.11755215, ..., -0.06562607,
         0.07794564,  0.03725365],
       [-1.11067637,  0.1061638 ,  0.00556913, ...,  0.05376182,
         0.06035015, -0.03610017],
       [ 0.13839467,  0.06340031,  0.04966773, ..., -0.07095192,
         0.00614628, -0.0197774 ],
       ...,
       [-0.53167615,  0.04919119,  0.00637625, ..., -0.10781339,
        -0.11532748, -0.07349469],
       [-0.84517602,  0.01330328, -0.10952662, ...,  0.10582753,
         0.00649984,  0.10107506],
       [ 0.18937779,  0.10031221,  0.10585985, ...,  0.09120847,
        -0.11921614,  0.01017577]])

In [44]:
tOpt2.shape

(10, 26)

In [45]:
tOpt2

array([[-1.23436295, -0.88457366, -4.17172089, -2.14302836,  3.83227527,
        -0.86735733, -1.89854671,  1.07079048,  1.26548936, -0.98257313,
        -2.01625568,  1.57348632,  1.72052287,  1.52379695,  1.01515122,
        -1.46920584, -0.0100641 ,  0.99717459, -1.30057783, -2.75243892,
        -0.78538374, -2.6397266 , -1.35611556, -2.00087964, -1.75039646,
         2.26475355],
       [-1.59290789, -1.39755351,  0.63757806, -1.21508766, -1.77067314,
        -0.37237966,  0.89989333, -1.98103919,  1.05507536, -2.04086235,
        -1.93279077, -0.20435047, -0.29320825,  2.43620006,  0.76008113,
        -3.25838187, -2.84337104,  1.39118386, -3.99549898,  2.77250322,
        -1.16172906, -2.38343301,  0.35043226,  0.2796903 ,  2.82893569,
        -2.10162599],
       [-0.88366247,  0.12268781, -2.84750342, -2.99118973, -1.00230217,
         2.92010933, -0.25979533, -0.75846717, -0.95674307, -0.84447855,
         1.33003543,  0.20819146, -1.45780495, -1.18316446, -3.61959755,
       

In [46]:
def prediction(X,thetaOpt1,thetaOpt2):
    [m,n] = X.shape
    #p = np.zeros((m,1))
    
    #thetaOpt1 = 25x401
    #thetaOpt2 = 10x26
    
    a1 = X #5000x401
    a2 = sigmoid(X.dot(thetaOpt1.T) ) #5000x25
    hypo = sigmoid(np.c_[np.ones((a2.shape[0],1)),a2].dot(thetaOpt2.T)) #5000x10
    
    opt = (np.argmax(hypo, axis=1))+1
    opt = opt.reshape(-1,1)
    return(opt)  

In [47]:
#OUR FUNCTION
# pred = prediction(X,thetaOpt1,thetaOpt2)

# THETA OPTIMUM OCTAVE
pred = prediction(X,tOpt1,tOpt2)
accuracy = np.mean(pred==y)*100
print('accuracy NN={}%'.format(accuracy))

accuracy NN=96.89999999999999%


<h2>
    kalo mau dilakukan perbandingan, <b>lakukan reshape!!</b>< br>
    
    contoh:<br> 
        pred = (5000,)<br>
        y = (5000,1)<br>
        
        ERRORRR!!<br>
        
        DO IT: pred.reshape(-1,1)
        
</h2>