In [1]:
# import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [2]:
# function for data normalization
def normalize(data):
    for row in data.T:
        r_mean = np.mean(row)
        r_range = np.amax(row) - np.amin(row)
        
        row -= r_mean
        row /= r_range
    return data

In [10]:
# activation functions
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_backprop(z):
    return sigmoid(z) * (np.ones(z.shape) - sigmoid(z))

def softmax(z):
    ez = np.exp(z)
    return ez / ez.sum(axis=1, keepdims=True)

def softmax_backprop(z):
    return softmax(z) * (np.ones(z.shape) - softmax(z))

# cost function
def cross_entropy(y_hat, y):
    n = y.shape[0]
    
    cost = np.multiply(y, np.log(y_hat))
    cost += np.multiply((np.ones(y.shape) - y), np.log(np.ones(y.shape) - y_hat))
    cost *= -1/n
    
    return cost.sum()

# function to get a neuron output
def predict(x, w, b, activation='sigmoid'):
    z = np.dot(x, w) + b
    if activation == 'softmax':
        return z, softmax(z)
    else:
        return z, sigmoid(z)
    
# examaple layers = [4, 2, 3]
def model(layers):
    parameters = {}
    
    network_depth = len(layers) - 1

    # generate weights for each layer
    for i in range(1, network_depth + 1):
        parameters['W%s' % (i - 1)] = np.random.rand(layers[i - 1], layers[i])
        parameters['B%s' % (i - 1)] = np.ones((1, layers[i]))
        
    return parameters, network_depth

# forward propagation for neural network
def forward_prop(x, parameters, network_depth):
    feed = x
    caches = []
    for i in range(0, network_depth):
        linear_cache = (feed, parameters['W%s' % i], parameters['B%s' % i])
        if i == network_depth - 1:
            linear, activation = predict(feed, parameters['W%s' % i], parameters['B%s' % i], activation='softmax')
            activation_cache = (activation, 'softmax')
        else:
            linear, activation = predict(feed, parameters['W%s' % i], parameters['B%s' % i])
            activation_cache = (activation, 'sigmoid')
        
        caches.append((linear_cache, activation_cache))
        
        feed = activation
    return feed, caches


def linear_backprop(dz, linear_cache):
    a_prev, w, b = linear_cache
    m = a_prev.shape[0]
    dw = np.dot(a_prev.T, dz) / m
    db = dz.sum(axis=0, keepdims=True) / m
    da_prev = np.dot(dz, w.T)
    
    return da_prev, dw, db

def linear_activation_backprop(da, cache):
    linear_cache, activation_cache = cache
    if activation_cache[1] == 'sigmoid':
        dz = np.multiply(da, sigmoid_backprop(activation_cache[0]))
        da_prev, dw, db = linear_backprop(dz, linear_cache)
    elif activation_cache[1] == 'softmax':
        dz = np.multiply(da, softmax_backprop(activation_cache[0]))
        da_prev, dw, db = linear_backprop(dz, linear_cache)
    
    return da_prev, dw, db

def backprop(y_hat, y, caches, parameters, learning_rate=0.05):    
    # derivative of cross entropy
    n_layers = len(caches)
    da = -(np.divide(y, y_hat) - np.divide(np.ones(y.shape) - y, np.ones(y.shape) - y_hat))
    
    for i in list(reversed(range(0, n_layers))):
        da_prev, dw, db = linear_activation_backprop(da, caches[i])
        update_parameters(dw, db, parameters['W%s' % i], parameters['B%s' % i], learning_rate)
        da = da_prev
    return parameters
    
# update parameters function
def update_parameters(dw, db, weights, bias, learning_rate=0.05):
    weights -= dw * learning_rate
    bias -= db * learning_rate

# function to train the neural network
def train(x, y, iterations, learning_rate=0.05):
    parameters, network_depth = model([4,10,3])

    for i in range(iterations):
        y_hat, caches = forward_prop(x, parameters, network_depth)
        parameters = backprop(y_hat, y, caches, parameters, learning_rate)
        if i % 10 == 0:
            print("Loss: {}".format(cross_entropy(y_hat, y)))
            
        if i % 100 == 0:
            print(y_hat[0, :])
            print(y_hat[75, :])
            print(y_hat[149, :])

In [11]:
iris_dataset = pd.read_csv('resources/datasets/iris.csv')
species = iris_dataset[['species']].values

x = normalize(iris_dataset[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].values)
y = []
for s in species.T[0]:
    if s == 'setosa':
        y.append([1, 0, 0])
    elif s == 'versicolor':
        y.append([0, 1, 0])
    elif s == 'virginica':
        y.append([0, 0, 1])
y = np.array(y)

train(x, y, 5000, learning_rate=0.05)

Loss: 1.9271477170800648
[0.32270382 0.2909261  0.38637008]
[0.32654871 0.28880388 0.38464741]
[0.33084708 0.2866993  0.38245362]
Loss: 1.917063838490444
Loss: 1.9135074286005715
Loss: 1.910752863214463
Loss: 1.9081019158925185
Loss: 1.9054711032152531
Loss: 1.9028513221311019
Loss: 1.900242161173177
Loss: 1.8976439081146803
Loss: 1.8950567102049463
Loss: 1.8924805825989228
[0.30508434 0.33449605 0.36041961]
[0.2905294  0.34137577 0.36809483]
[0.29330011 0.33959918 0.36710071]
Loss: 1.889915471906844
Loss: 1.8873612942287008
Loss: 1.8848179533616114
Loss: 1.8822853488250262
Loss: 1.8797633792453445
Loss: 1.8772519437392634
Loss: 1.8747509424610458
Loss: 1.8722602768076582
Loss: 1.8697798494859994
Loss: 1.8673095645252569
[0.31274407 0.33506282 0.3521931 ]
[0.28456024 0.34364071 0.37179906]
[0.2862884  0.34191115 0.37180045]
Loss: 1.8648493272677586
Loss: 1.8623990443516218
Loss: 1.8599586236904542
Loss: 1.8575279744521875
Loss: 1.8551070070378366
Loss: 1.8526956330605122
Loss: 1.850293

Loss: 1.4663335937097517
Loss: 1.4649831689975077
Loss: 1.4636352867468414
Loss: 1.4622899373617675
Loss: 1.4609471113172234
Loss: 1.4596067991583772
[0.47120376 0.32349543 0.20530081]
[0.1785204 0.3852256 0.436254 ]
[0.16540794 0.37979578 0.45479629]
Loss: 1.4582689914999416
Loss: 1.4569336790254943
Loss: 1.45560085248681
Loss: 1.4542705027031961
Loss: 1.4529426205608391
Loss: 1.4516171970121579
Loss: 1.4502942230751614
Loss: 1.4489736898328212
Loss: 1.447655588432442
Loss: 1.4463399100850474
[0.47758571 0.32221732 0.20019697]
[0.17474794 0.38685616 0.43839589]
[0.16127862 0.38110506 0.45761632]
Loss: 1.4450266460647634
Loss: 1.443715787708224
Loss: 1.4424073264139659
Loss: 1.4411012536418444
Loss: 1.4397975609124454
Loss: 1.438496239806513
Loss: 1.4371972819643777
Loss: 1.435900679085393
Loss: 1.434606422927379
Loss: 1.433314505306071
[0.48393461 0.32088541 0.19517997]
[0.17102704 0.38847949 0.44049347]
[0.15722043 0.38239577 0.4603838 ]
Loss: 1.4320249180945754
Loss: 1.4307376532228

Loss: 1.1998611592727522
Loss: 1.1989473876926637
Loss: 1.198034977135316
Loss: 1.1971239252313985
Loss: 1.1962142296231137
Loss: 1.1953058879641127
[0.61489447 0.27953171 0.10557382]
[0.10154189 0.42201889 0.47643922]
[0.08469092 0.40613867 0.5091704 ]
Loss: 1.1943988979194333
Loss: 1.1934932571654417
Loss: 1.192588963389771
Loss: 1.1916860142912598
Loss: 1.1907844075798981
Loss: 1.189884140976764
Loss: 1.188985212213967
Loss: 1.18808761903459
Loss: 1.1871913591926337
