In [1]:
#
# Hot Topics in Machine Learning, University of Mannheim, 2017
# Author: Rainer Gemulla
# Assignment 4

In [1]:
import math
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import random
from sklearn import svm
from sklearn.linear_model.logistic import LogisticRegression
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import mean_squared_error

In [2]:
%matplotlib notebook

In [3]:
inNotebook = True  # change this to True if you use a notebook
def nextplot():
    if inNotebook:
        plt.figure()  # this creates a new plot
    else:
        plt.clf()     # and this clears the current one

In [4]:
# create the datasets
%run -i "a04-helper.py"

# 1 Perceptrons

In [6]:
# plot X1 (separable)
nextplot()
plot3(X1, y1)

<IPython.core.display.Javascript object>

In [7]:
# plot X2 (not separable)
nextplot()
plot3(X2, y2)

<IPython.core.display.Javascript object>

In [8]:
def pt_classify(X, w):
    """Classify using a perceptron.

    Parameters
    ----------
    X : ndarray of shape (N,D) or shape (D,)
        Design matrix of test examples
    w : ndarray of shape (D,)
        Weight vector

    Returns
    -------
    ndarray of shape (N,)
        Predicted binary labels (either 0 or 1)"""
    if len(X.shape) == 1:
        X = X.reshape((1, X.shape[0]))
    return (X@w >= 0).astype(np.int)

## 1a+c Learning

In [9]:
def pt_train(X, y, maxepochs=100, pocket=False, w0=None):
    """Train a perceptron.

    Parameters
    ----------
    X : ndarray of shape (N,D)
        Design matrix
    y : ndarray of shape (N,)
        Binary labels (either 0 or 1)
    maxepochs : int
        Maximum number of passes through the training set before the algorithm
        returns
    pocket : bool
       Whether to use the pocket algorithm (True) or the perceptron learning algorithm
       (False)
    w0 : ndarray of shape (D,)
        Initial weight vector

    Returns
    -------
    ndarray of shape (D,)
        Fitted weight vector"""

    N, D = X.shape
    if w0 is None:        # initial weight vector
        w0 = np.zeros(D)
    w = w0                # current weight vector

    ## YOUR CODE HERE
    
    def sign(x,w):
            if np.dot(x,w) < 0:
                return 0
            else:
                return 1
    
    total_test_number = 0
    weight_update_number = 0
    
    
    if pocket:
        w_r = w
        num_correct = 0
        ep = 0
        while ep <= maxepochs:
            num_tmp = 0
            z = random.randint(0, N-1)  # bias term is X[,0]?
            tmp = sign(X[z],w_r)
            total_test_number += 1
            if tmp != y[z]:
                w_r = w_r + (y[z] * 2 -1) * X[z] # Due to y[z] is 0 or 1, so y[z]*2-1 would be -1 or 1 
                y_hat = pt_classify(X,w_r)
                for i in range(N):
                    if y_hat[i] == 1:
                        num_tmp += 1
                total_test_number += N
                if num_tmp > num_correct:
                    w = w_r
                    weight_update_number += 1
                    num_correct = num_tmp
            ep += 1
                        
    else:
        for j in range(maxepochs):
            flag = False
            for i in range(N):
                total_test_number += 1
                tmp = sign(X[i],w)
                if tmp != y[i]:
                    w = w + (y[i] * 2 -1) * X[i]
                    weight_update_number += 1
                    flag = True
            if flag is False:
                break
    print("total test number: ",total_test_number," weight update number: ",weight_update_number)
    return w
    
    

## 1b+d Experimentation

In [12]:
# Train a perceptron using the perceptron learning algorithm and plot decision
# boundary. You should get a perfect classification here. The decision boundary
# should not change if you run this multiple times.
w = pt_train(X1, y1)
nextplot()
plot3(X1,y1)
plot3db(w, label="perceptron")

total test number:  800  weight update number:  5


<IPython.core.display.Javascript object>

In [14]:
# Dataset 1
nextplot()
plot3dbs(X1, y1, n=10, maxepochs=1000, pocket=False)

<IPython.core.display.Javascript object>

total test number:  1800  weight update number:  17
total test number:  2000  weight update number:  19
total test number:  600  weight update number:  3
total test number:  1000  weight update number:  8
total test number:  2000  weight update number:  20
total test number:  1200  weight update number:  11
total test number:  1200  weight update number:  10
total test number:  1400  weight update number:  12
total test number:  1200  weight update number:  9
total test number:  1000  weight update number:  7

Misclassification rates (train)
Perceptron (best result): 0
Linear SVM (C=1)        : 0
Logistic regression     : 0


In [13]:
# Dataset 2
nextplot()
plot3dbs(X2, y2, n=10, maxepochs=1000, pocket=False)

<IPython.core.display.Javascript object>

total test number:  200000  weight update number:  4095
total test number:  200000  weight update number:  4068
total test number:  200000  weight update number:  4096
total test number:  200000  weight update number:  4078
total test number:  200000  weight update number:  4094
total test number:  200000  weight update number:  4090
total test number:  200000  weight update number:  4108
total test number:  200000  weight update number:  4081
total test number:  200000  weight update number:  4089
total test number:  200000  weight update number:  4085

Misclassification rates (train)
Perceptron (best result): 11
Linear SVM (C=1)        : 3
Logistic regression     : 3


In [39]:
# Train a perceptron using the pocket algorithm and plot decision boundary. You
# should get a perfect classification here (with high probability). The decision
# boundary should change if you run this multiple times.
w = pt_train(X1, y1, pocket=True)
nextplot()
plot3(X1,y1)
plot3db(w, label="pocket")

total test number:  1101  weight update number:  2


<IPython.core.display.Javascript object>

In [42]:
# Train 10 perceptrons starting with random weights. Also train logistic
# regression and SVM. Plot all decision boundaries, and print the
# misclassification rates (on training data). Try this with and without the
# pocket algorithm.
nextplot()
plot3dbs(X2, y2, n=10, maxepochs=1000, pocket=True)

<IPython.core.display.Javascript object>

total test number:  6001  weight update number:  1
total test number:  8001  weight update number:  1
total test number:  6401  weight update number:  3
total test number:  7001  weight update number:  1
total test number:  7201  weight update number:  1
total test number:  6201  weight update number:  1
total test number:  6801  weight update number:  1
total test number:  8401  weight update number:  5
total test number:  6801  weight update number:  1
total test number:  7401  weight update number:  1

Misclassification rates (train)
Perceptron (best result): 39
Linear SVM (C=1)        : 3
Logistic regression     : 3


# 2 Multi-Layer Feed-Forward Neural Networks

## 2a Conjecture how an FNN fit will look like

In [62]:
# here is the one-dimensional dataset that we will use
nextplot()
plot1(X3, y3, label="train")
plot1(X3test, y3test, label="test")
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x116757cf8>

## 2b Train with 2 hidden units

In [16]:
# Train an FNN using scikit-learn. nhidden is an integer, it refers to the
# number of units in the hidden layer.
def train3(nhidden, activation='logistic'):
    print("Training FNN with", nhidden, "hidden units...")
    if type(nhidden) is int:
        nhidden = (nhidden,)
    model = MLPRegressor(activation=activation,
                         hidden_layer_sizes=nhidden,
                         solver='lbfgs',  # better solver for small datasets
                         alpha=0,         # no regularization
                         max_iter=100000, tol=1e-10)
    model.fit(X3,y3.reshape((-1,)))
    return model

In [19]:
# Let's fit the model with 2 units in the hidden layer
model = train3(2)
print("Training error:", mean_squared_error(y3, model.predict(X3)))
print("Test error    :", mean_squared_error(y3test, model.predict(X3test)))

Training FNN with 2 hidden units...
Training error: 0.0796001138975
Test error    : 0.0867189630641


In [20]:
# plot the data and the fit
nextplot()
plot1(X3, y3, label="train")
plot1(X3test, y3test, label="test")
plot1fit(np.linspace(0, 13, 500)[:,np.newaxis], model)

<IPython.core.display.Javascript object>

In [66]:
# The weight matrices and bias vectors can be read out as follows. If you want,
# use these parameters to compute the output of the network (on X3) directly and
# compare to model.predict(X3)
weights = model.coefs_      # list of weight matrices (layer by layer)
biases = model.intercepts_  # list of bias vectors (layer by layer)

In [18]:
# now repeat this multiple times
# YOUR CODE HERE

for i in range(1,6):
    model = train3(i)
    print("Training error:", mean_squared_error(y3, model.predict(X3)))
    print("Test error    :", mean_squared_error(y3test, model.predict(X3test)))
    nextplot()
    tit = "i = "+str(i)
    plt.title(tit)
    plot1(X3, y3, label="train")
    plot1(X3test, y3test, label="test")
    plot1fit(np.linspace(0, 13, 500)[:,np.newaxis], model)
    

Training FNN with 1 hidden units...
Training error: 0.372918890125
Test error    : 0.374316696119


<IPython.core.display.Javascript object>

Training FNN with 2 hidden units...
Training error: 0.0795767086126
Test error    : 0.0867283369592


<IPython.core.display.Javascript object>

Training FNN with 3 hidden units...
Training error: 0.275699115114
Test error    : 0.284372032888


<IPython.core.display.Javascript object>

Training FNN with 4 hidden units...
Training error: 0.0244473065542
Test error    : 0.0283003955255


<IPython.core.display.Javascript object>

Training FNN with 5 hidden units...
Training error: 0.00730752758384
Test error    : 0.0103492015705


<IPython.core.display.Javascript object>

## 2c Improved training

In [22]:
# Derive a meta-learning algorithm that trains multiple times and returns the
# best fit (on training data). Use train3 in your implementation and pass
# argument "activation" to it as is.
def train3improved(nhidden, repetitions=10, activation='logistic'):
    # YOUR CODE HERE
    model_r = []
    j_r = 10
    for i in range(repetitions):
        model = train3(nhidden)
        j = mean_squared_error(y3, model.predict(X3))
        print(j)
        if j < j_r:
            model_r = model
            j_r = j
    return model_r

## 2d Width

In [23]:
# From now on, only use your improved method. Experiment with different hidden
# layer sizes
# YOUR CODE HERE
c = [1,2,3,10,50,100]
for i in range(6):
    model = train3improved(c[i])
    print("Training error:", mean_squared_error(y3, model.predict(X3)))
    print("Test error    :", mean_squared_error(y3test, model.predict(X3test)))
    nextplot()
    tit = "i = "+str(c[i])
    plt.title(tit)
    plot1(X3, y3, label="train")
    plot1(X3test, y3test, label="test")
    plot1fit(np.linspace(0, 13, 500)[:,np.newaxis], model)

Training FNN with 1 hidden units...
0.372918890132
Training FNN with 1 hidden units...
0.372918890129
Training FNN with 1 hidden units...
0.372918890124
Training FNN with 1 hidden units...
0.372918890131
Training FNN with 1 hidden units...
0.372918890141
Training FNN with 1 hidden units...
0.372918890502
Training FNN with 1 hidden units...
0.372918890244
Training FNN with 1 hidden units...
0.372918890129
Training FNN with 1 hidden units...
0.372918890229
Training FNN with 1 hidden units...
0.438548228124
Training error: 0.372918890124
Test error    : 0.374316698193


<IPython.core.display.Javascript object>

Training FNN with 2 hidden units...
0.27776878489
Training FNN with 2 hidden units...
0.302777803389
Training FNN with 2 hidden units...
0.357249510841
Training FNN with 2 hidden units...
0.286915792283
Training FNN with 2 hidden units...
0.0796260000886
Training FNN with 2 hidden units...
0.357249510807
Training FNN with 2 hidden units...
0.0795966039984
Training FNN with 2 hidden units...
0.0795866121575
Training FNN with 2 hidden units...
0.0795929460315
Training FNN with 2 hidden units...
0.372918925919
Training error: 0.0795866121575
Test error    : 0.0867305519728


<IPython.core.display.Javascript object>

Training FNN with 3 hidden units...
0.00859503865141
Training FNN with 3 hidden units...
0.0509846948755
Training FNN with 3 hidden units...
0.00834200354199
Training FNN with 3 hidden units...
0.357222330657
Training FNN with 3 hidden units...
0.0538870726006
Training FNN with 3 hidden units...
0.356208317406
Training FNN with 3 hidden units...
0.0510738361485
Training FNN with 3 hidden units...
0.0518871049059
Training FNN with 3 hidden units...
0.0518311590458
Training FNN with 3 hidden units...
0.0531410547864
Training error: 0.00834200354199
Test error    : 0.0120451475121


<IPython.core.display.Javascript object>

Training FNN with 10 hidden units...
0.00722519999522
Training FNN with 10 hidden units...
0.00704643988918
Training FNN with 10 hidden units...
0.00725232101007
Training FNN with 10 hidden units...
0.00678462683865
Training FNN with 10 hidden units...
0.00668934889555
Training FNN with 10 hidden units...
0.00688158533382
Training FNN with 10 hidden units...
0.00721167704218
Training FNN with 10 hidden units...
0.00750002003813
Training FNN with 10 hidden units...
0.0068292005408
Training FNN with 10 hidden units...
0.00678453965521
Training error: 0.00668934889555
Test error    : 0.0111095894787


<IPython.core.display.Javascript object>

Training FNN with 50 hidden units...
0.0068270905144
Training FNN with 50 hidden units...
0.00750140402998
Training FNN with 50 hidden units...
0.00684876529498
Training FNN with 50 hidden units...
0.00713658026598
Training FNN with 50 hidden units...
0.00649390326428
Training FNN with 50 hidden units...
0.0067240025531
Training FNN with 50 hidden units...
0.00668313137327
Training FNN with 50 hidden units...
0.00693916226034
Training FNN with 50 hidden units...
0.00716432390566
Training FNN with 50 hidden units...
0.00686968777277
Training error: 0.00649390326428
Test error    : 0.0112715633192


<IPython.core.display.Javascript object>

Training FNN with 100 hidden units...
0.0075420939779
Training FNN with 100 hidden units...
0.00733627134019
Training FNN with 100 hidden units...
0.00681544326259
Training FNN with 100 hidden units...
0.00674576734375
Training FNN with 100 hidden units...
0.00689478617878
Training FNN with 100 hidden units...
0.00673263155758
Training FNN with 100 hidden units...
0.00681069861179
Training FNN with 100 hidden units...
0.0069921241965
Training FNN with 100 hidden units...
0.00774469661815
Training FNN with 100 hidden units...
0.00717989660318
Training error: 0.00673263155758
Test error    : 0.0113797150491


<IPython.core.display.Javascript object>

## 2e Distributed representations

In [24]:
# train a model to analyze
model = train3improved(2)

Training FNN with 2 hidden units...
0.0796077516954
Training FNN with 2 hidden units...
0.357249765244
Training FNN with 2 hidden units...
0.357249531779
Training FNN with 2 hidden units...
0.357249855998
Training FNN with 2 hidden units...
0.0796030809375
Training FNN with 2 hidden units...
0.0795888469268
Training FNN with 2 hidden units...
0.354218986415
Training FNN with 2 hidden units...
0.357249530079
Training FNN with 2 hidden units...
0.372705918953
Training FNN with 2 hidden units...
0.0795891523714


In [25]:
# plot the fit as well as the outputs of each neuron in the hidden
# layer (scale for the latter is shown on right y-axis)
nextplot()
plot1(X3, y3, label="train")
plot1(X3test, y3test, label="test")
plot1fit(np.linspace(0, 13, 500)[:,np.newaxis], model, hidden=True, scale=False)

<IPython.core.display.Javascript object>

In [26]:
# plot the fit as well as the outputs of each neuron in the hidden layer, scaled
# by its weight for the output neuron (scale for the latter is shown on right
# y-axis)
nextplot()
plot1(X3, y3, label="train")
plot1(X3test, y3test, label="test")
plot1fit(np.linspace(0, 13, 500)[:,np.newaxis], model, hidden=True, scale=True)

<IPython.core.display.Javascript object>

In [27]:
model = train3improved(3)
nextplot()
plot1(X3, y3, label="train")
plot1(X3test, y3test, label="test")
plot1fit(np.linspace(0, 13, 500)[:,np.newaxis], model, hidden=True, scale=False)
nextplot()
plot1(X3, y3, label="train")
plot1(X3test, y3test, label="test")
plot1fit(np.linspace(0, 13, 500)[:,np.newaxis], model, hidden=True, scale=True)

Training FNN with 3 hidden units...
0.05199817042
Training FNN with 3 hidden units...
0.00854035592904
Training FNN with 3 hidden units...
0.0795818834646
Training FNN with 3 hidden units...
0.286919459356
Training FNN with 3 hidden units...
0.00748408016628
Training FNN with 3 hidden units...
0.0634797997672
Training FNN with 3 hidden units...
0.275504853209
Training FNN with 3 hidden units...
0.0795891838554
Training FNN with 3 hidden units...
0.00754997613239
Training FNN with 3 hidden units...
0.0092386323583


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [28]:
model = train3improved(10)
nextplot()
plot1(X3, y3, label="train")
plot1(X3test, y3test, label="test")
plot1fit(np.linspace(0, 13, 500)[:,np.newaxis], model, hidden=True, scale=False)
nextplot()
plot1(X3, y3, label="train")
plot1(X3test, y3test, label="test")
plot1fit(np.linspace(0, 13, 500)[:,np.newaxis], model, hidden=True, scale=True)

Training FNN with 10 hidden units...
0.00968735711292
Training FNN with 10 hidden units...
0.00739858659571
Training FNN with 10 hidden units...
0.00685150692207
Training FNN with 10 hidden units...
0.006742480638
Training FNN with 10 hidden units...
0.00673196895233
Training FNN with 10 hidden units...
0.00684396164812
Training FNN with 10 hidden units...
0.00681095022536
Training FNN with 10 hidden units...
0.00694904882748
Training FNN with 10 hidden units...
0.00683402919107
Training FNN with 10 hidden units...
0.00693745150011


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## 2f Depth (optional)

In [29]:
# Experiment with different number of layers.
#
# The argument nhidden is a tuple with as many entries as there are hidden
# layers; each entry is the size of the layer. The example code below uses two
# hidden layers; the first hidden layer has 9 neurons, then second one has 10
# neurons.
#
# The argument activation specifies the type of neurons in the hidden layers.
# Try 'logistic', 'relu', and 'tanh'. For 'relu' units and 1 hidden layer, you
# can also plot the outputs of the hidden neurons (using the same code above).
model = train3improved((9,10,), 50, activation='relu')
nextplot()
plot1(X3, y3, label="train")
plot1(X3test, y3test, label="test")
plot1fit(np.linspace(0, 13, 500)[:,np.newaxis], model)
print("Training error:", mean_squared_error(y3, model.predict(X3)))
print("Test error    :", mean_squared_error(y3test, model.predict(X3test)))

Training FNN with (9, 10) hidden units...
0.00681737031037
Training FNN with (9, 10) hidden units...
0.00704362155318
Training FNN with (9, 10) hidden units...
0.00705189648836
Training FNN with (9, 10) hidden units...
0.00669421082263
Training FNN with (9, 10) hidden units...
0.00716251673178
Training FNN with (9, 10) hidden units...
0.00691831975085
Training FNN with (9, 10) hidden units...
0.00676748354142
Training FNN with (9, 10) hidden units...
0.00676493726632
Training FNN with (9, 10) hidden units...
0.00674166452688
Training FNN with (9, 10) hidden units...
0.0232709007898
Training FNN with (9, 10) hidden units...
0.00736276703478
Training FNN with (9, 10) hidden units...
0.00691764889249
Training FNN with (9, 10) hidden units...
0.0071194316287
Training FNN with (9, 10) hidden units...
0.00657773653729
Training FNN with (9, 10) hidden units...
0.00688149749735
Training FNN with (9, 10) hidden units...
0.00678683573695
Training FNN with (9, 10) hidden units...
0.35447024938
Tr

<IPython.core.display.Javascript object>

Training error: 0.00643076340882
Test error    : 0.0115872726165


## 2g Keras (optional)

In [5]:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras import backend

Using TensorFlow backend.


In [30]:
# train an FNN with one hidden layer and nhidden units using Keras
def train3keras(nhidden_or_model, epochs=2000, lr=None, verbose=True):
    # we force computation on the CPU (much faster for such a small model)
    with tf.device("/cpu:0"):
        if type(nhidden_or_model) is int:
            # our FNN is a sequence of layers
            nhidden = nhidden_or_model
            model = Sequential()

            if nhidden > 0:
                # create the hidden layer (fully connected, logistic units).
                # Here we "help" the fitting method by provided a range of
                # useful (random) choice for the initial biases.
                model.add(Dense(nhidden, activation='sigmoid', input_dim=1,
                                bias_initializer=keras.initializers.RandomUniform(0,4*np.pi)))

                # create the output layer (fully connected, linear unit)
                model.add(Dense(1, activation='linear'))
            else:
                # create just the output layer (fully connected, linear unit)
                model.add(Dense(1, activation='linear', input_dim=1))

            # compile the model
            if lr is None:
                lr = 0.01
            model.compile(loss='mean_squared_error',                # optimize the mean squared error (MSE)
                          optimizer=keras.optimizers.Adam(lr=lr))   # use the Adam optimizer

            if verbose:
                print("Training FNN with {:d} hidden units for {:d} epochs...".format(nhidden, epochs))
        else:
            model = nhidden_or_model
            if lr is not None:
                backend.set_value(model.optimizer.lr, lr)
            if verbose:
                print("Continuing training for another {:d} epochs...".format(epochs))

        # train the model
        history = model.fit(X3, y3, epochs=epochs, verbose=0)

        # print errors
        mse_train = model.evaluate(X3,y3, verbose=0)
        if verbose:
            print("Training error:", mse_train)

        # return result
        return model, history

In [35]:
# Let's fit the model with 2 units in the hidden layer
model, history = train3keras(3)

Training FNN with 3 hidden units for 2000 epochs...
Training error: 0.0447012969851


In [36]:
# Plot the progress training made over time
nextplot()
plt.plot(history.history['loss'])
#plt.ylim([0,0.5])
plt.yscale("log")
plt.xlabel("Epoch")
plt.ylabel("Mean squared error")

<IPython.core.display.Javascript object>

<matplotlib.text.Text at 0x1e8595fec88>

In [37]:
# If the model does not "look" converged, you may want to try more
# epochs. You can resume training as follows:
train3keras(model)

Continuing training for another 2000 epochs...
Training error: 0.0440244069695


(<keras.models.Sequential at 0x1e855c7e710>,
 <keras.callbacks.History at 0x1e85af7bfd0>)

In [38]:
# plot the data and the fit
nextplot()
plot1(X3, y3, label="train")
plot1(X3test, y3test, label="test")
plot1fit(np.linspace(0, 13, 500)[:,np.newaxis], model, hidden=True)

<IPython.core.display.Javascript object>