In [78]:
# Import Libraries
import numpy as np
import random
import urllib.request

Download iris dataset


In [79]:
urllib.request.urlretrieve(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", 
    "iris-data.txt")

('iris-data.txt', <http.client.HTTPMessage at 0x1a2879f1c30>)

Pre-process data


In [80]:
random.seed(0)
np.random.seed(0)

In [81]:
# Ratio of data allocated to training and testing.
train_test_ratio = 0.8

# Temporary list to append lines of text from `iris-data.txt`
tmp_list_iris = []

# Temporary set to append labels to (set only keeps one of each label).
# i.e. "Iris-versicolor", "Iris-verginica", "Iris-setosa"
tmp_set_label = set()

# Appends features from same line.
features = []

# Labels from the same line.
labels = []

In [82]:
# Opens file and adds all lines that are not spaces to list as string.
with open("iris-data.txt") as f:
    for line in f.readlines():
        if not line.isspace():
            tmp_list_iris.append(line)

    random.shuffle(tmp_list_iris)

for line in tmp_list_iris:
    # Splits line by commas into a list
    # i.e. ['5.7', '3.8', '1.7', '0.3', 'Iris-setosa']
    split_line = line.strip().split(',')

    # Expected to be 5.
    length_line = len(split_line)

    for i in range(length_line - 1):
        # Converts string feature values into floats.
        split_line[i] = float(split_line[i])

    # The label is the last element of the `split_line` array.
    # i.e. 'Iris-setosa'
    label = split_line[length_line - 1]

    # Adds label to temporary set for use later.
    tmp_set_label.add(label)

    # Appends the array of float values to the features array.
    features.append(split_line[:length_line - 1])

    # Appends the label to the labels array.
    labels.append(label)

Scale data

In [83]:
# Determines the minimum and maximum values for the entire features array.
# i.e. max_val = 7.9, min_val = 0.0
max_val = max([item for i in features for item in i])
min_val = min([item for i in features for item in i])

for i in range(len(features)):
    for j in range(len(features[0])):
        # Updates features array to scale between 0.0 and 1.0
        features[i][j] = (features[i][j] - min_val) / (max_val - min_val)

One-hot encoding

In [84]:
# Creates a list of all used labels from set (values are all unique).
tmp_list_one_hot_encoding = list(tmp_set_label)

for i in range(len(labels)):
    # Turns the string label value into the corresponding index value.
    labels[i] = tmp_list_one_hot_encoding.index(labels[i])

# Turns the labels array (now a list of indexes) into a numpy array.
label_idx = np.array(labels)

# Creates a len(labels) x len(tmp_list_one_hot_encoding) matrix of zeros.
# i.e. 150 x 3
labels = np.zeros((len(labels), len(tmp_list_one_hot_encoding)))

# For each list in array it sets the corresponding label_idx value to 1.
# i.e. [[0, 0, 1], [1, 0, 0], [0, 0, 1], [1, 0, 0], [1, 0, 0], [0, 0, 1], ...]
labels[np.arange(len(labels)), label_idx] = 1

split into train-test set

In [85]:
# Creates training and test sets for features and labels.
features_train = np.array(features[:int(train_test_ratio * len(features))])
features_test = np.array(features[int(train_test_ratio * len(features)):])

labels_train = labels[:int(train_test_ratio * len(labels))]
labels_test = labels[int(train_test_ratio * len(labels)):]

Neural Network
hyper-parameters

In [86]:
# Sets input layers to the number of features (4).
n_input_layers = len(features_test[0])

n_hidden_layers = 5

# Sets the number of output layers to the number of label types (3).
n_output_layers = len(tmp_list_one_hot_encoding)

learning_rate = 0.01
momentum = 0.9

n_epoch = 100

In [87]:
# Activation functions
activation_f = {
    'identity': lambda x: x,
    'sigmoid': lambda x: 1.0 / (1.0 + np.exp(-x)),
    'tanh': lambda x: np.tanh(x),
    'relu': lambda x: x * (x > 0),
}

# Derivatives of the previous lambda functions.
activation_f_prime = {
    'identity': lambda x: 1,
    'sigmoid': lambda x: x * (1.0 - x),
    'tanh': lambda x: 1 - x**2,
    'relu': lambda x: 1.0 * (x > 0),
}

In [88]:
f1 = 'tanh'
f2 = 'sigmoid'

act_f1 = activation_f[f1]
act_f2 = activation_f[f2]

act_f1_prime = activation_f_prime[f1]
act_f2_prime = activation_f_prime[f2]

Training Function


In [89]:
def train(input_features, output_label, i_h_weights, h_o_weights):
    input_features = input_features.reshape(1, -1)

    # forward prop
    h_inter = input_features @ i_h_weights
    h_result = act_f1(h_inter)
    o_inter = h_result @ h_o_weights
    o_result = act_f2(o_inter)

    error = np.mean(0.5 * np.square(o_result - output_label))

    # back prop
    del_h_o = -np.multiply(output_label - o_result, act_f2_prime(o_result))
    change_h_o = h_result.T @ del_h_o
    del_i_h = np.dot(del_h_o, h_o_weights.T) * act_f1_prime(h_result)
    change_i_h = input_features.T @ del_i_h

    return error, change_i_h, change_h_o

Predict Function

In [90]:
# uses just forward prop
def predict(input_features, i_h_weights, h_o_weights):
    h_inter = input_features @ i_h_weights
    h_result = act_f1(h_inter)
    o_inter = h_result @ h_o_weights
    o_result = act_f2(o_inter)
    return (o_result >= max(o_result)).astype(int)

Train Neural Network

In [91]:
print("*********** Train ***********")

# Initial Random Weights
V = np.random.normal(scale=0.1, size=(n_input_layers, n_hidden_layers))
W = np.random.normal(scale=0.1, size=(n_hidden_layers, n_output_layers))

# Training-set
X = features_train
T = labels_train

# Epoch-training
for epoch in range(n_epoch):
    tr_err = []

    for i in range(X.shape[0]):
        loss, grad_V, grad_W = train(X[i], T[i], V, W)

        # Adjust Weights
        V -= learning_rate * grad_V + momentum * grad_V
        W -= learning_rate * grad_W + momentum * grad_W

        tr_err.append(loss)
        
    if epoch % 10 == 0:
        val_err = []
        
        # use test set as validiation set
        for i in range(features_test.shape[0]):
            loss, _, _ = train(features_test[i], labels_test[i], V, W)
            val_err.append(loss)
            
        train_error = sum(tr_err) / len(tr_err)
        valid_error = sum(val_err) / len(val_err)
        
        print("Epoch:", epoch, " Train-error:", train_error, " Validation-error:", valid_error)

*********** Train ***********
Epoch: 0  Train-error: 0.10989753036080718  Validation-error: 0.08803014139909461
Epoch: 10  Train-error: 0.037157827031363104  Validation-error: 0.03274245777332146
Epoch: 20  Train-error: 0.02123807603893122  Validation-error: 0.019283277257004537
Epoch: 30  Train-error: 0.01666350436779533  Validation-error: 0.015397931897714021
Epoch: 40  Train-error: 0.014211889231652569  Validation-error: 0.014806383025108992
Epoch: 50  Train-error: 0.01193071677410847  Validation-error: 0.008617168512610938
Epoch: 60  Train-error: 0.00948568533990954  Validation-error: 0.009958701905327935
Epoch: 70  Train-error: 0.008640393057259436  Validation-error: 0.007859707776487382
Epoch: 80  Train-error: 0.008499162764819592  Validation-error: 0.00805293813794774
Epoch: 90  Train-error: 0.008237914044169091  Validation-error: 0.008302529059961214


Test Neural Network


In [92]:
print("*********** Test ***********")

success = 0
for i in range(len(features_test)):
    a = predict(features_test[i], V, W)
    b = labels_test[i]
    if np.array_equal(a, b):
        success += 1

print("Total = %d Success = %d Accuracy = %f" %
      (len(features_test), success, success * 100 / len(features_test)))

*********** Test ***********
Total = 30 Success = 29 Accuracy = 96.666667
