In [51]:
# NOTE: 
# reg_param stands for regularization parameter
# mathematically, lambda is usually used, but, in Python, lambda has a different meaning
# as a result reg_param is used to represent this value

In [34]:
# imports
import pandas as pd
import numpy as np

In [35]:
# read in data as a pandas DataFrame
data = pd.read_csv('iris.data.txt', header=None)

# label data columns
data.columns = ["sepal length", "sepal width", "petal length", "petal width", "class"]

# create a list of the different iris classes
labels = list(data['class'].unique())

In [53]:
# to test algorithm with different sets, run this cell

# some useful values for the set
training_set_proportion = 0.7
set_size = data.shape[0]
training_set_size = int(training_set_proportion*set_size)
test_set_size = set_size - training_set_size

# shuffle data and input bias unit
shuffled = data.sample(frac=1)
shuffled.insert(0, 'bias', np.ones(set_size))
# NOTE: to view data, call "data" for organized data or "shuffled" for randomized data with bias unit

# use the take(np.array) function to divide randomized rows into training and test sets 
training_indices = np.zeros(training_set_size)
test_indices = np.zeros(test_set_size)
counter1 = 0
counter2 = 0
for i in training_indices:
    training_indices[counter1] = counter1
    counter1 += 1
for i in test_indices:
    test_indices[counter2] = counter1
    counter1 += 1
    counter2 += 1
training_set = shuffled.take(training_indices) # DataFrame with (training_set_size) random examples
test_set = shuffled.take(test_indices) # DataFrame with (test_set_size) random examples unique from training_set

# create pandas Series for training labels and test labels with string datatype
training_labels_strings = training_set.pop('class')
test_labels_strings = test_set.pop('class')

In [37]:
# useful values
m = training_set.shape[0]
num_input_nodes = training_set.shape[1]
num_hidden_nodes = 3
num_output_nodes = len(labels)

In [38]:
# create matrices with binary values for class labels instead of strings
training_labels = np.zeros((m, len(labels)), dtype=int)
test_labels = np.zeros((test_set.shape[0], len(labels)), dtype=int)
for i in range(m):
    flower_type = np.array(training_labels_strings.take([i]))[0]
    if (flower_type == labels[0]):
        training_labels[i][0] = 1
    if (flower_type == labels[1]):
        training_labels[i][1] = 1
    if (flower_type == labels[2]):
        training_labels[i][2] = 1
for i in range(test_set.shape[0]):
    flower_type = np.array(test_labels_strings.take([i]))[0]
    if (flower_type == labels[0]):
        test_labels[i][0] = 1
    if (flower_type == labels[1]):
        test_labels[i][1] = 1
    if (flower_type == labels[2]):
        test_labels[i][2] = 1

In [39]:
# convert sets to numpy array and shorten name for ease later
train = np.array(training_set)
test = np.array(test_set)

In [40]:
# create theta
theta1_shape = (num_hidden_nodes, num_input_nodes)
theta1 = np.zeros(theta1_shape)
theta2_shape = (num_output_nodes, num_hidden_nodes + 1)
theta2 = np.zeros(theta2_shape)

In [41]:
# initialize theta using small, random values to avoid symmetry
def initializeTheta(theta1, theta2, initialization_range=1e-3):
    for i in range(theta1.shape[0]):
        for j in range(theta1.shape[1]):
            random_number = (np.random.rand() - 0.5) * initialization_range
            theta1[i][j] = random_number
    for i in range(theta2.shape[0]):
        for j in range(theta2.shape[1]):
            random_number = (np.random.rand() - 0.5) * initialization_range
            theta2[i][j] = random_number
    return (theta1, theta2)

In [42]:
# sigmoid function
def sig(x):
    return 1/(1+np.exp(-1*x))
def sigmoid(z):
    length = z.flatten().size
    result = np.zeros(length)
    for i in range(length):
        result[i] = sig(z.flatten()[i])
    result = np.reshape(result, z.shape)
    return result

In [43]:
# cost function
def cost(train, training_labels, theta1, theta2, reg_param):
    num_input_nodes = train[0]
    num_output_nodes = training_labels[1].size
    J = 0
    for i in range(m):
        a1 = train[i]
        z2 = np.dot(theta1, a1)
        a2 = np.insert(sigmoid(z2), 0, 1)
        z3 = np.dot(theta2, a2)
        a3 = sigmoid(z3).flatten()
        for k in range(num_output_nodes):
            yi_k = training_labels[i][k]
            pred_k = a3[k]
            J += -1*(yi_k * np.log(pred_k) + (1 - yi_k) * np.log(1 - pred_k))
    # regularization
    for i in range(theta1.flatten().size):
        J += reg_param / 2 * theta1.flatten()[i]**2
    for i in range(theta2.flatten().size):
        J += reg_param / 2 * theta2.flatten()[i]**2
    J /= m
    return J

In [44]:
# backpropagation
def backprop(train, training_labels, theta1, theta2, reg_param):
    grad1 = np.zeros(theta1.shape)
    grad2 = np.zeros(theta2.shape)
    # backpropagate
    for i in range(m):
        a1 = train[i][:,None]
        z2 = np.dot(theta1, a1)
        a2 = np.insert(sigmoid(z2.flatten()), 0, 1)[:,None]
        z3 = np.dot(theta2, a2)
        a3 = sigmoid(z3).flatten()
        delta3 = np.subtract(a3, training_labels[i])[:,None]
        delta2 = np.multiply(np.dot(theta2.T, delta3), np.multiply(a2, np.subtract(np.ones(a2.shape), a2)))
        delta2 = delta2[1:len(delta2)]
        grad2 = np.add(grad2, np.dot(delta3, a2.T))
        grad1 = np.add(grad1, np.dot(delta2, a1.T))
    # regularization
    for i in range(grad1.shape[0]):
        for j in range(grad1.shape[1]):
            grad1[i][j] /= m
            if (j > 0):
                grad1[i][j] += reg_param * theta1[i][j]
    for i in range(grad2.shape[0]):
        for j in range(grad2.shape[1]):
            grad2[i][j] /= m
            if (j > 0):
                grad2[i][j] += reg_param * theta2[i][j]
    return (grad1, grad2)

In [49]:
# gradient descent
def train_network(train, training_labels, theta1, theta2, reg_param, alpha, iterations=1000, updates=False):
    (theta1, theta2) = initializeTheta(theta1, theta2)
    for i in range(iterations):
        (grad1, grad2) = backprop(train, training_labels, theta1, theta2, reg_param)
        grad1 = np.multiply(grad1, alpha)
        grad2 = np.multiply(grad2, alpha)
        theta1 = np.subtract(theta1, grad1)
        theta2 = np.subtract(theta2, grad2)
        if (updates):
            if (i % 1000 == 0):
                print(cost(train, training_labels, theta1, theta2, reg_param))
    return (theta1, theta2)

In [46]:
# test accuracy of theta
def test_theta(test, test_labels, theta1, theta2):
    total = 0
    correct = 0
    for i in range(len(test)):
        a1 = test[i][:,None] # (5, 1)
        z2 = np.dot(theta1, a1)
        a2 = np.insert(sigmoid(z2.flatten()), 0, 1)[:,None] # (4, 1)
        z3 = np.dot(theta2, a2)
        a3 = sigmoid(z3).flatten()
        if (predict(a3) == predict(test_labels[i])):
            correct += 1
        total += 1
    return correct / total

In [47]:
# predict most likely class
def predict(choices):
    result = 0
    maximum = 0
    for i in range(choices.size):
        if choices[i] > maximum:
            maximum = choices[i]
            result = i
    return result

In [54]:
# test best values for reg_param and alpha
# values below have tested best for accuracy
reg_params = [0.001]
alphas = [0.1]
# to test more values, add to above arrays

# test accuracy based on each combination of reg_param and alpha
best_accuracy = 0
best_reg_param = reg_params[0]
best_alpha = alphas[0]
for reg_param in reg_params:
    for alpha in alphas:
        (theta1, theta2) = train_network(train, training_labels, theta1, theta2, reg_param, alpha)
        accuracy = test_theta(test, test_labels, theta1, theta2)
        J = cost(train, training_labels, theta1, theta2, reg_param)
        if (accuracy > best_accuracy):
            best_accuracy = accuracy
            best_reg_param = reg_param
            best_alpha = alpha
        print("Accuracy = ", round(accuracy, 2), " and Cost = ", round(J, 5), " (", reg_param, ", ", alpha, ")", sep="")
print("Best:", best_accuracy)
print("Regularization:", best_reg_param)
print("Alpha:", best_alpha)

Accuracy = 0.93 and Cost = 0.82958 (0.001, 0.1)
Best: 0.9333333333333333
Regularization: 0.001
Alpha: 0.1
