# Introduction to neural nets

## 2. Multi-layer perceptron
<figure align="middle">
    <img src="images/perceptron_2layers.png" title="2-layer perceptron" width="400px">
    <figcaption>Fig 1. Multi-layer perceptron with 1 hidden layer.</figcaption>
</figure>

Implementation of a 2-layer perceptron: input layer plus hidden layer plus output.

In [None]:
import numpy as np

class MLPerceptron:
    """ A Multi-Layer Perceptron"""

    def __init__(self, inputs, targets, nhidden, beta=1, momentum=0.9, outtype='logistic'):
        """ Constructor """
        # Set up network size
        self.nin = np.shape(inputs)[1]
        self.nout = np.shape(targets)[1]
        self.ndata = np.shape(inputs)[0]
        self.nhidden = nhidden

        self.beta = beta
        self.momentum = momentum
        self.outtype = outtype

        # Initialise network
        self.weights1 = (np.random.rand(self.nin + 1, self.nhidden) - 0.5) * 2 / np.sqrt(self.nin)
        self.weights2 = (np.random.rand(self.nhidden + 1, self.nout) - 0.5) * 2 / np.sqrt(self.nhidden)



    def train(self, inputs, targets, eta, niterations):
        """ Train the thing """
        # Add the bias node
        inputs_with_bias = np.concatenate((inputs, -np.ones((self.ndata, 1))), axis=1)
        change = range(self.ndata)

        updatew1 = np.zeros((np.shape(self.weights1)))
        updatew2 = np.zeros((np.shape(self.weights2)))

        current_iteration = 0
        for n in range(niterations):
            current_iteration += 1
            self.outputs = self.forward(inputs_with_bias)

            error = 0.5 * np.sum((self.outputs - targets) ** 2)
            
            if (np.mod(n, 50) == 0):
                print("Iteration: ", n+1, " Error: ", error)

            
            # Compute delta between targets and outputs
            # Different types of output neurons
            if self.outtype == 'linear':
                deltao = (self.outputs - targets) / self.ndata
            elif self.outtype == 'logistic':
                deltao = self.beta * (self.outputs - targets) * self.outputs * (1.0 - self.outputs)
            elif self.outtype == 'softmax':
                deltao = (self.outputs - targets) * (self.outputs * (-self.outputs) + self.outputs) / self.ndata
            else:
                print("error")

            
            # Compute delta for each weight from input to hidden
            deltah = self.hidden * self.beta * (1.0 - self.hidden) * (np.dot(deltao, np.transpose(self.weights2)))

            # backpropagation of error
            updatew1 = eta * (np.dot(np.transpose(inputs_with_bias), deltah[:, :-1])) + self.momentum * updatew1
            updatew2 = eta * (np.dot(np.transpose(self.hidden), deltao)) + self.momentum * updatew2
            self.weights1 -= updatew1
            self.weights2 -= updatew2

            cm, accuracy = self.confusion_matrix(inputs, targets)
            if accuracy == 1.0:
                break
        print("Total {} iterations".format(current_iteration))

    def forward(self, inputs):
        """ Run the network forward """

        self.hidden = np.dot(inputs, self.weights1);
        self.hidden = 1.0 / (1.0 + np.exp(-self.beta * self.hidden))
        self.hidden = np.concatenate((self.hidden, -np.ones((np.shape(inputs)[0], 1))), axis=1)

        outputs = np.dot(self.hidden, self.weights2);

        # Different types of output neurons
        if self.outtype == 'linear':
            return outputs
        elif self.outtype == 'logistic':
            return 1.0 / (1.0 + np.exp(-self.beta * outputs))
        elif self.outtype == 'softmax':
            normalisers = np.sum(np.exp(outputs), axis=1) * np.ones((1, np.shape(outputs)[0]))
            return np.transpose(np.transpose(np.exp(outputs)) / normalisers)
        else:
            print("error")

    def confusion_matrix(self, inputs, targets):
        """Confusion matrix"""

        # Add the inputs that match the bias node
        inputs = np.concatenate((inputs, -np.ones((np.shape(inputs)[0], 1))), axis=1)
        outputs = self.forward(inputs)

        nclasses = np.shape(targets)[1]

        if nclasses == 1:
            nclasses = 2
            outputs = np.where(outputs > 0.5, 1, 0)
        else:
            # 1-of-N encoding
            outputs = np.argmax(outputs, 1)
            targets = np.argmax(targets, 1)

        cm = np.zeros((nclasses, nclasses))
        for i in range(nclasses):
            for j in range(nclasses):
                cm[i, j] = np.sum(np.where(outputs == i, 1, 0) * np.where(targets == j, 1, 0))


        return cm, np.trace(cm) / np.sum(cm)

## 2.1. Learning AND
This still learns a linear separator

In [None]:
anddata = np.array([[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 1]])

p = MLPerceptron(anddata[:, 0:2], anddata[:, 2:3], 2)
p.train(anddata[:, 0:2], anddata[:, 2:3], 0.25, 1001)

print()
cm, accuracy = p.confusion_matrix(anddata[:, 0:2], anddata[:, 2:3])
print("Confusion matrix:")
print(cm)
print("Accuracy: ", accuracy)

## 2.2. Learning XOR

In [None]:
xordata = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 0]])
q = MLPerceptron(xordata[:, 0:2], xordata[:, 2:3], 2, outtype='logistic')
q.train(xordata[:, 0:2], xordata[:, 2:3], 0.25, 5001)

print()
cm, accuracy = q.confusion_matrix(xordata[:, 0:2], xordata[:, 2:3])
print("Confusion matrix:")
print(cm)
print("Accuracy: ",accuracy)

<figure align="middle">
    <img src="images/mlp_XOR.png" title="Separated XOR by hyperplane" width="400px">
    <figcaption>Fig 2. Separated the linearly non-separable XOR by a hyperplane.</figcaption>
</figure>

## 2.3. Breast cancer diagnosis

The simple 2-layer perceptron implemented above is quite powerful.
We are going to use it to classify mammography results into malignant and non-malignant. 
The dataset is from [here](https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+(Diagnostic)).

In [None]:
data_file = "wdbc.csv"

In [None]:
import csv

def csv_to_array(file_name):
    np_a = None
    with open(file_name) as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        for row in readCSV:
            class_label = 0
            if row[0].strip()=='M':
                class_label = 1
            num_arr = [float(x) for x in row[1:]] + [class_label]

            if np_a is None:
                np_a = np.array(num_arr)
            else:
                np_a = np.c_[np_a, num_arr]

    np_a = np.transpose(np_a)

    return np_a

In [None]:
bc_data = csv_to_array(data_file)
n_rows = np.shape(bc_data)[0]
n_cols = np.shape(bc_data)[1]
print("Dataset contains",n_rows,"rows and", n_cols,"columns")

# keep target attribute 0 or 1
target_a = bc_data[:,n_cols-1:n_cols ]

# normalize data in each column by subtracting mean and dividing by variance
bc_data = bc_data[:, 0:n_cols-1]
bc_data = (bc_data - bc_data.mean(axis=0)) / bc_data.var(axis=0)

bc_data = np.c_[bc_data,target_a]

Train the network:

In [None]:
p = MLPerceptron(bc_data[:, 0:n_cols-1], bc_data[:, n_cols-1:n_cols], 2, outtype="linear")
p.train(bc_data[:, 0:n_cols-1], bc_data[:, n_cols-1:n_cols], 0.0005, 1000001)

In [None]:
cm, accuracy = p.confusion_matrix(bc_data[:, 0:n_cols-1], bc_data[:, n_cols-1:n_cols])
print("Confusion matrix:")
print(cm)
print("Accuracy: ",accuracy)
print ("Note that the accuracy is only on a training set (just for demo)")

Copyright &copy; 2020 Marina Barsky. All rights reserved.