In [13]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

# Define the necessary functions

In [17]:
def one_hot_encode(y, n):
    one_hot = np.zeros((y.shape[0], n))
    one_hot[np.arange(y.shape[0]), y] = 1
    return one_hot

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def softmax(x):
    temp = np.exp(x)
    sums = temp.sum(axis=1, keepdims=True)
    softout = temp/sums
    return(softout)

def cross_entropy(pred, labels):
    N = labels.shape[0]
    e = 1e-6
    loss = 1/N * (labels * np.log(pred + e)).sum()
    return loss

def accuracy(pred, labels):
    return np.mean(pred.argmax(axis=1).reshape(-1, 1) == labels)

# Load the dataset

In [3]:
X, y = load_iris(return_X_y=True)
y = y.reshape(-1, 1)

# Shuffling the dataset and splitting the data into test and train set

In [4]:
X, y = shuffle(X, y) # Shuffle the data
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Split the data into train, test sets
x_train.shape, x_test.shape

((120, 4), (30, 4))

# We need to encode to labels into one-hot vectors

In [5]:
Y = one_hot_encode(y_train, 3)

# Define the model parameters

In [6]:
learning_rate = 0.001
INPUT_LAYERS = 4
HIDDEN_LAYERS = 12
OUTPUT_LAYERS = 3
W1 = np.random.randn(INPUT_LAYERS,HIDDEN_LAYERS)
B1 = np.zeros((1, HIDDEN_LAYERS))
W2 = np.random.randn(HIDDEN_LAYERS, OUTPUT_LAYERS)
B2 = np.zeros((1, OUTPUT_LAYERS))

# Forward Propagation

In [7]:
def forward(X, W1, B1, W2, B2):
    Z1 = X.dot(W1) + B1
    A1 = sigmoid(Z1)
    Z2 = A1.dot(W2) + B2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

# Backward Propagation

In [8]:
def backward(Z1, A1, Z2, B2):
    d2 = A2 - Y
    dLW2 = A1.T.dot(d2)
    dLB2 = d2.sum(axis=0, keepdims=True)
    dLW1 = x_train.T.dot(A1*(1-A1)*d2.dot(W2.T))
    dLB1 = d2.dot(W2.T).sum(axis=0, keepdims=True)
    return dLW2, dLB2, dLW1, dLB1

In [9]:
for i in range(3000):
    Z1, A1, Z2, A2 = forward(x_train, W1, B1, W2, B2)
    
    if i%300 == 0:
        print(f'Loss: {cross_entropy(A2, Y)}')
        
    dLW2, dLB2, dLW1, dLB1 = backward(Z1, A1, Z2, A2)
    
    W1 = W1 - learning_rate * dLW1
    W2 = W2 - learning_rate * dLW2
    B1 = B1 - learning_rate * dLB1
    B2 = B2 - learning_rate * dLB2

Loss: -1.5656266708204611
Loss: -0.33859836705395596
Loss: -0.19156033836582034
Loss: -0.14329326006155968
Loss: -0.1232344693925788
Loss: -0.11287961404582912
Loss: -0.10669205274032262
Loss: -0.10260308882096733
Loss: -0.09969946086539319
Loss: -0.09752529269597439


# The train set accuracy

In [10]:
accuracy(A2, y_train)

0.9666666666666667

# The test set accuracy

In [12]:
pred = forward(x_test, W1, B1, W2, B2)[3]
accuracy(pred, y_test)

0.9666666666666667