In [1]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.cross_validation import train_test_split



In [2]:
iris = datasets.load_iris()
data = iris.data
target = iris.target

In [3]:
# We are keeping 20% of data samples as test set
train_X, test_X, train_y, test_y = train_test_split(data, target, test_size = 0.20)

In [4]:
len(train_X), len(test_X), len(train_y), len(test_y)

(120, 30, 120, 30)

In [22]:
idim = train_X[0].shape[0] # size of input layer - "4"
hdim = 100 # size of hidden layers(100 nodes)
odim = len(np.unique(train_y)) # size of output layer - "3"

alpha = 0.001 # Learning rate
reg_lambda = 0.01

In [23]:
np.random.seed(123)
model = {'W1': None, 'b1': None, 'W2': None, 'b2': None}

In [24]:
def forward_prop(model, x):
    
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    
    z1 = x.dot(W1) + b1
    a1 = np.tanh(z1)
    z2 = a1.dot(W2) + b2
    exp_scores = np.exp(z2)
    probs = exp_scores/np.sum(exp_scores, axis=1, keepdims=True)
    
    return probs

In [25]:
def get_loss(model, x):
    
    probs = forward_prop(model, x)
    
    targets = -np.log(probs[range(len(train_X)), train_y])
    loss = np.sum(targets)
    
    loss += reg_lambda/2 * (np.sum(np.square(model['W1'])) + np.sum(np.square(model['W2'])))
    return 1./len(train_X) * loss

In [26]:
def predict(model, x):
    probs = forward_prop(model, x)
    return np.argmax(probs, axis=1)

def get_accuracy(model, x, y):    
    predictions = predict(model, x)
    accuracy = np.sum(y == predictions)/len(x)
    return accuracy

In [27]:
def trainer(hdim, epochs):
    
    W1 = np.random.rand(idim, hdim)/np.sqrt(idim)
    b1 = np.zeros((1, hdim))
    W2 = np.random.randn(hdim, odim)/np.sqrt(hdim)
    b2 = np.zeros((1, odim))
    
    model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
    
    # For whole batch
    for epoch in range(epochs):
        
        z1 = train_X.dot(W1) + b1
        a1 = np.tanh(z1)
        z2 = a1.dot(W2) + b2
        exp_scores = np.exp(z2)
        probs = exp_scores/np.sum(exp_scores, axis=1, keepdims=True)
        
        # Backpropagation
        delta3 = probs
        delta3[range(len(train_X)), train_y] -= 1
        dW2 = (a1.T).dot(delta3)
        db2 = np.sum(delta3, axis=0, keepdims=True)
        delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2))
        dW1 = np.dot(train_X.T, delta2)
        db1 = np.sum(delta2, axis=0)
        
        # Add regularization terms
        dW2 += reg_lambda * W2
        dW1 += reg_lambda * W1
        
        # Gradient descent
        W1 += -alpha * dW1
        b1 += -alpha * db1
        W2 += -alpha * dW2
        b2 += -alpha * db2
        
        model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
        print("Loss after iteration %d: %f"%(epoch, get_loss(model, train_X)))
        
        if epoch % 1 == 0:
            print("Accuracy after iteration %d: %f"%(epoch, get_accuracy(model, test_X, test_y)))
            
    return model

In [28]:
model = trainer(8, epochs=100)

Loss after iteration 0: 1.730792
Accuracy after iteration 0: 0.500000
Loss after iteration 1: 1.439260
Accuracy after iteration 1: 0.500000
Loss after iteration 2: 1.279237
Accuracy after iteration 2: 0.500000
Loss after iteration 3: 1.185159
Accuracy after iteration 3: 0.566667
Loss after iteration 4: 1.128534
Accuracy after iteration 4: 0.700000
Loss after iteration 5: 1.094806
Accuracy after iteration 5: 0.266667
Loss after iteration 6: 1.073356
Accuracy after iteration 6: 0.266667
Loss after iteration 7: 1.056066
Accuracy after iteration 7: 0.266667
Loss after iteration 8: 1.036318
Accuracy after iteration 8: 0.333333
Loss after iteration 9: 1.007945
Accuracy after iteration 9: 0.500000
Loss after iteration 10: 0.969829
Accuracy after iteration 10: 0.500000
Loss after iteration 11: 0.936348
Accuracy after iteration 11: 0.500000
Loss after iteration 12: 0.912907
Accuracy after iteration 12: 0.500000
Loss after iteration 13: 0.891245
Accuracy after iteration 13: 0.500000
Loss after i