# Exercise 1
Add Backpropagation to your MLP and train the model on the ZIP-Dataset.

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from statistics import mean
import random
from numpy import linalg as LA
from sklearn import decomposition
%matplotlib inline

Using MLP class from assignment 8:

- MLP with both variable depth D (number of layers) and variable number of neurons ni for each layer i = 1,...,D.

In [1]:
class MLP:
    def __init__(self, threshold, depth, layer_width):
        """
        This constructor sets random network weights and checks if the input depth matches the provided layers.
        """
        self.threshold = threshold
        self.depth = depth
        if not len(layer_width) == (depth + 1):
            raise Exception("'layer_width' needs to be of length 'depth' + 1")  
        self.layer_width = layer_width
        self.network_weights = []
        self.network_biases = []
        width_prev = self.layer_width[0]
        for width in self.layer_width[1:]:
            self.network_weights.append(np.random.randn(width_prev, width)* np.sqrt(1. / width_prev))
            self.network_biases.append(np.zeros((1, width)))
            width_prev = width
    
    def heaviside(self, X):
        """This Function is a tiny implementation of the heaviside step function."""
        return (X > 0).astype(int)
    
    def iterate_trough_network(self, X):
        """This Function passes the input X through all weights and returns the prediction vector."""
        X_i = X.copy()
        for i in range(self.depth):
            z_i = X_i @ self.network_weights[i] + self.network_biases[i]
            X_i = self.heaviside(z_i)
        return X_i
    
    def train(self, X):
        """The train function will be implemented in Assignment 9."""
        raise Exception("The train function will be implemented in Assignment 9!!!")
    
    def predict(self, X):
        """This function passes the input X to the iteration function."""
        return (self.iterate_trough_network(X)).ravel()
    
    def accuracy(self, labels, predictions):
        """This function calculates the binary class accuracy for given true/predicted labels."""
        return np.mean(labels == predictions)

**Load ZIP data set**

In [None]:
path_to_train = 'zip.train'
path_to_test = 'zip.test'
training_data = np.array(pd.read_csv(path_to_train, sep=' ', header=None))
test_data = np.array(pd.read_csv(path_to_test, sep =' ',header=None))

X_train_zip, y_train_zip = training_data[:,1:-1], training_data[:,0]
X_test_zip, y_test_zip = test_data[:,1:], test_data[:,0]

# We only want to classify two different digits. You can choose which digits you want to classify youself

X_train_zip = X_train_zip[np.logical_or(y_train_zip == 0, y_train_zip == 1)]
y_train_zip = y_train_zip[np.logical_or(y_train_zip == 0, y_train_zip == 1)]

X_test_zip = X_test_zip[np.logical_or(y_test_zip == 0, y_test_zip == 1)]
y_test_zip = y_test_zip[np.logical_or(y_test_zip == 0, y_test_zip == 1)]

#### Classify the Zip-Dataset with the random initial weights

In [21]:
mlp_network = MLP(threshold=0.01, depth=2, layer_width=[X_train_zip.shape[1], 10, 1])

In [22]:
print(mlp_network.network_weights[0].shape)
print(mlp_network.network_weights[1].shape)
print(mlp_network.network_biases[0].shape)
print(mlp_network.network_biases[1].shape)

(256, 10)
(10, 1)
(1, 10)
(1, 1)


In [23]:
y_pred_mlp = mlp_network.predict(X_train_zip)

In [24]:
mlp_network.accuracy(y_train_zip, y_pred_mlp)

0.296043656207367

In [25]:
np.unique(y_pred_mlp, return_counts=True)

(array([0, 1]), array([1622,  577], dtype=int64))

In [26]:
np.unique(y_train_zip, return_counts=True)

(array([0., 1.]), array([1194, 1005], dtype=int64))

#### Get a mean accuracy over multiple runs

In [27]:
acc_list_mlp = []
n_runs = 100
for i in range(n_runs):
    mlp_network = MLP(threshold=0.01, depth=2, layer_width=[X_train_zip.shape[1], 10, 1])
    y_pred_loop = mlp_network.predict(X_train_zip)
    acc_list_mlp.append(mlp_network.accuracy(y_train_zip, y_pred_loop))
print("Mean Acc over", n_runs, "runs, with random weights is:", np.mean(acc_list_mlp))

Mean Acc over 100 runs, with random weights is: 0.49954524783992726


### (a) Optimize width (the number of neurons in a hidden layer; it is usually the same for all of them) and depth of the network. Try to find a setting that trains in a reasonable time. Plot the loss.

### (b) Show some digits that are classified incorrectly.

### (c) Plot your first weight layer as a grayscale image.