## Assignment 3

In [1]:
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
import random
np.random.seed(1)

#### load the data

In [2]:
train_data = pd.read_csv('./ex3_train.csv')
test_data = pd.read_csv('./ex3_test.csv')

#### split the data into X and y

In [279]:
X_train = train_data.iloc[:, train_data.columns != 'y']
y_train = train_data["y"]
X_test = test_data.iloc[:, test_data.columns != 'y']
y_test = test_data["y"]

X_train_array = np.asarray(X_train)
y_train_array = np.asarray(y_train)
X_test_array = np.asarray(X_test)
y_test_array = np.asarray(y_test)

print ("X_train: ", X_train_array.shape)
print ("y_train: ", y_train_array.shape)

X_train:  (3500, 400)
y_train:  (3500,)


#### One-hot encoding 

In [280]:
def one_hot_encoding(y, numOfClasses):
    encoded = np.zeros((y.shape[0],numOfClasses))
    for i in range(y.shape[0]):
        encoded[i,y[i]] = 1.0
    return encoded

In [281]:
y_train_encoded = one_hot_encoding(y_train_array, 10)
y_train_encoded.shape

(3500, 10)

#### sigmoid function

In [282]:
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

In [283]:
input_layer_size = 400
hidden_layer_size = 25
output_layer_size = 10

#### Initialize Parameters

Initialize weights

In [284]:
def initialize_weights(input_layer_size,hidden_layer_size,output_layer_size):
    np.random.seed(1)
    w1 = np.random.rand(hidden_layer_size, input_layer_size) * 0.01
    w2 = np.random.rand(output_layer_size, hidden_layer_size) * 0.01
    return (w1, w2)

In [285]:
w1, w2 = initialize_weights(input_layer_size, hidden_layer_size, output_layer_size)
print (w1.shape)
print (w2.shape)

(25, 400)
(10, 25)


Initializing bias weights to 0

In [287]:
bias1 = np.zeros((hidden_layer_size, 1))
print ("b1: ", bias1.shape)
bias2 = np.zeros((output_layer_size, 1))
print ("b2: ", bias2.shape)

b1:  (25, 1)
b2:  (10, 1)


In [290]:
def forward_propagation(X, w1, w2, b1, b2):
    a0 = X
    z1 = (np.dot(a0,w1.T).T + b1)
    a1 = sigmoid(z1)
    z2 = (np.dot(w2, a1)+ b2)
    a2 = sigmoid(z2).T
    return a0,a1,a2,z1,z2

In [291]:
a0, a1, a2, z1, z2 = forward_propagation(X_train_array, w1, w2, bias1, bias2)

#### cost function

In [308]:
def compute_cost(prediction, actual):
    print (actual.shape[0])
    loss = np.multiply(actual, np.log(prediction)) + np.multiply((1-actual), np.log(1 - prediction))
    cost = (-1/actual.shape[0]) * np.sum(loss)
    return cost

In [309]:
compute_cost(a2, y_train_encoded)

3500


7.2301203322052228

#### Backward Propagation

In [301]:
def backward_propagation(a1,a2,X,y,w1,w2,b1,b2):
    m = y.shape[0]
    dz2 = a2-y
#     print(dz2.shape)
    dw2 = (1/m) * np.dot(dz2.T, a1.T)
#     print (dw2.shape)
    db2 = (1/m) * np.sum(dz2.T, axis=1, keepdims=True)
#     print (db2.shape)
    da1 = np.dot(dz2, w2)
    dz1 = (1/m) * np.multiply(da1.T,np.multiply(a1,(1-a1)))
#     print (dz1.shape)
    dw1 = (1/m) * np.dot(dz1, X)
#     print (dw1.shape)
    db1 = (1/m) * np.sum(dz1, axis=1, keepdims=True)
    return dw1, dw2, db1, db2

In [302]:
dw1, dw2, db1, db2 = backward_propagation(a1, a2, X_train_array, y_train_encoded, w1, w2, bias1, bias2)

In [303]:
def gradient_descent(X, y, w1, w2, b1, b2, alpha,ep):
    m = y.shape[1]
    cost_history = []
    epochs = 0
#     a0, a1, a2, z1, z2 = forward_propagation(X, w1, w2, b1, b2)
#     newCost = compute_cost(a2, y)
#     oldCost = float('inf')
#     cost_history.append(newCost)
    while epochs < ep:
#     while newCost < oldCost and abs(newCost - oldCost) > 1e-5 or epochs < ep:
        a0, a1, a2, z1, z2 = forward_propagation(X, w1, w2, b1, b2)
        newCost = compute_cost(a2, y)
        cost_history.append(newCost)
        dw1, dw2, db1, db2 = backward_propagation(a1, a2, X, y, w1, w2, b1, b2)
    
        # update weights
        w1 = w1 - (alpha * dw1)
        w2 = w2 - (alpha * dw2)
        b1 = b1 - (alpha * db1)
        b2 = b2 - (alpha * db2)
        epochs = epochs + 1
        
    min_cost_iter = cost_history.index(min(cost_history))
    print ("Minimum cost at iteration: ", min_cost_iter)
    print ("Minimum cost achieved: ", min(cost_history))
    print(cost_history[-1])
    return cost_history, w1, w2, b1, b2     

In [304]:
def softmax(x):
    mat = np.exp(x)
    return mat / mat.sum(0)

def predict(w1, w2, b1, b2, X):
    m = X.shape[0]
    num_labels = w2.shape[0]
    p = np.zeros((m,1))
    h1 = sigmoid((np.dot(X,w1.T).T + b1).T)
    h2 = sigmoid((np.dot(h1,w2.T).T + b2).T)
    p = softmax(h2)
#     print (p)
    actual_pred = []
    for item in p:
        actual_pred.append(np.argmax(item))
    return actual_pred

def accuracy(actual, predicted):
    count = 0
    for x,y in zip(actual, predicted):
        if x == y:
            count = count + 1
    return count/actual.shape[0]

In [307]:
cost_history01, wl_result01, w2_result01, b1_res01, b2_res01 = gradient_descent(X_train_array,y_train_encoded,w1,w2,\
                                                                                bias1, bias2, 1, 30000)

Minimum cost at iteration:  29999
Minimum cost achieved:  0.594816505733
0.594816505733


In [306]:
predictions01 = predict(wl_result01, w2_result01, b1_res01, b2_res01, X_test_array)
accuracy01 = accuracy(y_test_array, predictions01)
print (accuracy01)

0.5166666666666667


In [None]:
cost_history001, wl_result001, w2_result001, b1_res001, b2_res001 = gradient_descent(X_train_array, y_train_encoded, w1, w2, bias1,\
                                                                      bias2, 0.001, 300000)

In [None]:
cost_history1, wl_result1, w2_result1, b1_res1, b2_res1 = gradient_descent(X_train_array,y_train_encoded,w1,w2,\
                                                                                bias1,bias2,0.1,1000)