In [1]:
# import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [4]:
# function for data normalization
def normalize(data):
    for row in data.T:
        r_mean = np.mean(row)
        r_range = np.amax(row) - np.amin(row)
        
        row -= r_mean
        row /= r_range
    return data

In [211]:
# activation functions
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_backprop(z):
    return sigmoid(z) * (np.ones(z.shape) - sigmoid(z))

def softmax(z):
    ez = np.exp(z)
    return ez / ez.sum(axis=1, keepdims=True)

def softmax_backprop(z):
    return softmax(z) * (np.ones(z.shape) - softmax(z))

# cost function
def cross_entropy(y_hat, y):
    n = y.shape[0]
    
    cost = np.multiply(y, np.log(y_hat))
    cost += np.multiply((np.ones(y.shape) - y), np.log(np.ones(y.shape) - y_hat))
    cost *= -1/n
    
    return cost.sum()

# function to get a neuron output
def predict(x, w, b, activation='sigmoid'):
    z = np.dot(x, w) + b
    if activation == 'softmax':
        return z, softmax(z)
    else:
        return z, sigmoid(z)
    
# examaple layers = [4, 2, 3]
def model(layers):
    parameters = {}
    
    network_depth = len(layers) - 1

    # generate weights for each layer
    for i in range(1, network_depth + 1):
        parameters['W%s' % (i - 1)] = np.random.rand(layers[i - 1], layers[i])
        parameters['B%s' % (i - 1)] = np.ones((1, layers[i]))
        
    return parameters, network_depth

# forward propagation for neural network
def forward_prop(x, parameters, network_depth):
    feed = x
    caches = []
    for i in range(0, network_depth):
        linear_cache = (feed, parameters['W%s' % i], parameters['B%s' % i])
        if i == network_depth - 1:
            linear, activation = predict(feed, parameters['W%s' % i], parameters['B%s' % i], activation='softmax')
            activation_cache = (activation, 'softmax')
        else:
            linear, activation = predict(feed, parameters['W%s' % i], parameters['B%s' % i])
            activation_cache = (activation, 'sigmoid')
        
        caches.append((linear_cache, activation_cache))
        
        feed = activation
    return feed, caches


def linear_backprop(dz, linear_cache):
    a_prev, w, b = linear_cache
    m = a_prev.shape[0]
    dw = np.dot(a_prev.T, dz) / m
    db = dz.sum(axis=0, keepdims=True) / m
    da_prev = np.dot(dz, w.T)
    
    return da_prev, dw, db

def linear_activation_backprop(da, cache):
    linear_cache, activation_cache = cache
    if activation_cache[1] == 'sigmoid':
        dz = np.multiply(da, sigmoid_backprop(activation_cache[0]))
        da_prev, dw, db = linear_backprop(dz, linear_cache)
    elif activation_cache[1] == 'softmax':
        dz = np.multiply(da, softmax_backprop(activation_cache[0]))
        da_prev, dw, db = linear_backprop(dz, linear_cache)
    
    return da_prev, dw, db

def backprop(y_hat, y, caches, parameters, learning_rate=0.05):    
    # derivative of cross entropy
    n_layers = len(caches)
    da = -(np.divide(y, y_hat) - np.divide(np.ones(y.shape) - y, np.ones(y.shape) - y_hat))
    
    for i in list(reversed(range(0, n_layers))):
        da_prev, dw, db = linear_activation_backprop(da, caches[i])
        update_parameters(dw, db, parameters['W%s' % i], parameters['B%s' % i], learning_rate)
        da = da_prev
    return parameters
    
# update parameters function
def update_parameters(dw, db, weights, bias, learning_rate=0.05):
    weights -= dw * learning_rate
    bias -= db * learning_rate

# function to train the neural network
def train(x, y, iterations, learning_rate=0.05):
    parameters, network_depth = model([4,12,3])

    for i in range(iterations):
        y_hat, caches = forward_prop(x, parameters, network_depth)
        parameters = backprop(y_hat, y, caches, parameters, learning_rate)
        if i % 10 == 0:
            print("Loss: {}".format(cross_entropy(y_hat, y)))
            
        if i % 100 == 0:
            print(y_hat[0, :])
            print(y_hat[50, :])
            print(y_hat[149, :])

In [None]:
iris_dataset = pd.read_csv('resources/datasets/iris.csv')
species = iris_dataset[['species']].values

x = normalize(iris_dataset[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].values)
y = []
for s in species.T[0]:
    if s == 'setosa':
        y.append([1, 0, 0])
    elif s == 'versicolor':
        y.append([0, 1, 0])
    elif s == 'virginica':
        y.append([0, 0, 1])
y = np.array(one_hot)

train(x, y, 5000, learning_rate=0.05)