In [1]:
import numpy as np

In [3]:
# read in the data
X = []
y = []

with open('./perceptron.data', 'r') as f:
    for line in f:
        info = line.strip('\n').split(',')
        X.append([float(i) for i in info[:4]])
        y.append(float(info[4]))

In [4]:
# make the numpy arrays
X = np.vstack(X)
y = np.array(y)

In [5]:
# parameters
N = len(y)
learning_rate = 1.

In [6]:
def f(W, b, X):
    """
    function f as mentioned in the lecture
    """
    return W.dot(X) + b

In [7]:
def predict(W, b, X):
    """
    Perceptron prediction.
    X is an array of samples (not just one sample).
    """
    pred = X.dot(W) + b
    # change predictions to 1 and -1
    pred[pred > 0] = 1
    pred[pred <= 0] = -1
    return pred

In [8]:
def accuracy(y_pred, y_truth):
    """
    Compute accuracy.
    """
    return np.mean(y_pred == y_truth)

In [10]:
W = np.zeros(4) # initial weights
b = 0 # initial bias

# (sub)gradient descent
# TODO: how to vectorize the training?
for epoch in range(10000):
    # print data
    print('Iteration: ', epoch)
    print(W)
    print(b)
    # check accuracy
    pred = predict(W, b, X)
    acc = accuracy(pred, y)
    print('accuracy:', acc)
    if (acc == 1.0):
        break
    print()
    
    # train using gradient descent
    for i in range(N):
        if -y[i]*f(W, b, X[i]) >= 0:
            W += learning_rate * y[i]*X[i]
            b += learning_rate * y[i]


Iteration:  0
[ 0.  0.  0.  0.]
0
accuracy: 0.394

Iteration:  1
[-6.87773525  6.33034122 -3.53400144  6.7111223 ]
0.0
accuracy: 0.971

Iteration:  2
[-8.19317316  6.24753498 -5.43262126  8.21632839]
0.0
accuracy: 0.908

Iteration:  3
[-9.14722627  9.13720801 -5.86813752  9.68346849]
0.0
accuracy: 0.988

Iteration:  4
[ -9.75395133  10.00161735  -6.46331178  10.81516961]
0.0
accuracy: 0.971

Iteration:  5
[-10.72245701   9.91025175  -6.8492348   11.78692398]
0.0
accuracy: 0.986

Iteration:  6
[-10.54273312  10.79807368  -8.05864498  12.36492285]
0.0
accuracy: 0.989

Iteration:  7
[-11.39717975  11.41330608  -7.91834422  12.57238297]
0.0
accuracy: 0.997

Iteration:  8
[-12.32015627  11.7883129   -7.65088944  12.59602624]
0.0
accuracy: 0.988

Iteration:  9
[-11.75404608  11.86012986  -7.75844178  14.38274211]
0.0
accuracy: 0.939

Iteration:  10
[-12.2662465   13.04839573  -8.61199487  13.4836672 ]
0.0
accuracy: 0.979

Iteration:  11
[-13.59750409  12.69369566  -8.24227774  13.90836465]
0

### Stochastic Gradient Descent

In [86]:
# Stochastic gradient descent
W = np.zeros(4) # initial weights
b = 0 # initial bias

# stochastic (sub)gradient descent
for epoch in range(10000):
    # print data
    print('Iteration: ', epoch)
    print(W)
    print(b)
    # check accuracy
    pred = predict(W, b, X)
    acc = accuracy(pred, y)
    print('accuracy:', acc)
    if (acc == 1.0):
        break
    print()
    
    # train using stochastic gradient descent
    i = epoch % N # the sample used in this epoch
    if -y[i]*f(W, b, X[i]) >= 0:
        W += learning_rate * y[i]*X[i]
        b += learning_rate * y[i]


Iteration:  0
[ 0.  0.  0.  0.]
0
accuracy: 0.53

Iteration:  1
[-3.  2.  4.  3.]
-1.0
accuracy: 0.58

Iteration:  2
[ 1.  1.  4.  6.]
0.0
accuracy: 0.64

Iteration:  3
[ 5. -2.  6.  2.]
-1.0
accuracy: 0.93

Iteration:  4
[ 5. -2.  6.  2.]
-1.0
accuracy: 0.93

Iteration:  5
[ 5. -2.  6.  2.]
-1.0
accuracy: 0.93

Iteration:  6
[ 5. -2.  6.  2.]
-1.0
accuracy: 0.93

Iteration:  7
[ 5. -2.  6.  2.]
-1.0
accuracy: 0.93

Iteration:  8
[ 5. -2.  6.  2.]
-1.0
accuracy: 0.93

Iteration:  9
[ 5. -2.  6.  2.]
-1.0
accuracy: 0.93

Iteration:  10
[ 5. -2.  6.  2.]
-1.0
accuracy: 0.93

Iteration:  11
[ 5. -2.  6.  2.]
-1.0
accuracy: 0.93

Iteration:  12
[ 4. -5.  6. -1.]
0.0
accuracy: 0.92

Iteration:  13
[ 4. -5.  6. -1.]
0.0
accuracy: 0.92

Iteration:  14
[ 4. -5.  6. -1.]
0.0
accuracy: 0.92

Iteration:  15
[ 4. -5.  6. -1.]
0.0
accuracy: 0.92

Iteration:  16
[ 4. -5.  6. -1.]
0.0
accuracy: 0.92

Iteration:  17
[ 4. -5.  6. -1.]
0.0
accuracy: 0.92

Iteration:  18
[ 4. -5.  6. -1.]
0.0
accuracy: 0

3.0
accuracy: 0.98

Iteration:  315
[  9.  -5.  13.  -1.]
3.0
accuracy: 0.98

Iteration:  316
[  9.  -5.  13.  -1.]
3.0
accuracy: 0.98

Iteration:  317
[  9.  -5.  13.  -1.]
3.0
accuracy: 0.98

Iteration:  318
[  9.  -5.  13.  -1.]
3.0
accuracy: 0.98

Iteration:  319
[  9.  -5.  13.  -1.]
3.0
accuracy: 0.98

Iteration:  320
[  9.  -5.  13.  -1.]
3.0
accuracy: 0.98

Iteration:  321
[  9.  -5.  13.  -1.]
3.0
accuracy: 0.98

Iteration:  322
[  9.  -5.  13.  -1.]
3.0
accuracy: 0.98

Iteration:  323
[  9.  -5.  13.  -1.]
3.0
accuracy: 0.98

Iteration:  324
[  9.  -5.  13.  -1.]
3.0
accuracy: 0.98

Iteration:  325
[  9.  -5.  13.  -1.]
3.0
accuracy: 0.98

Iteration:  326
[  9.  -5.  13.  -1.]
3.0
accuracy: 0.98

Iteration:  327
[  9.  -5.  13.  -1.]
3.0
accuracy: 0.98

Iteration:  328
[  9.  -5.  13.  -1.]
3.0
accuracy: 0.98

Iteration:  329
[  7.  -5.  13.  -6.]
4.0
accuracy: 0.94

Iteration:  330
[  7.  -5.  13.  -6.]
4.0
accuracy: 0.94

Iteration:  331
[  7.  -5.  13.  -6.]
4.0
accuracy: 

Iteration:  511
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  512
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  513
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  514
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  515
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  516
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  517
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  518
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  519
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  520
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  521
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  522
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  523
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  524
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  525
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  526
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  527
[  9.  -5.  17.   2.]
3.0
accuracy: 0.95

Iteration:  52

[ 11.  -6.  21.  -2.]
5.0
accuracy: 0.95

Iteration:  820
[ 11.  -6.  21.  -2.]
5.0
accuracy: 0.95

Iteration:  821
[ 11.  -6.  21.  -2.]
5.0
accuracy: 0.95

Iteration:  822
[ 11.  -9.  20.  -1.]
6.0
accuracy: 0.97

Iteration:  823
[ 11.  -9.  20.  -1.]
6.0
accuracy: 0.97

Iteration:  824
[ 11.  -9.  20.  -1.]
6.0
accuracy: 0.97

Iteration:  825
[ 11.  -9.  20.  -1.]
6.0
accuracy: 0.97

Iteration:  826
[ 11.  -9.  20.  -1.]
6.0
accuracy: 0.97

Iteration:  827
[ 11.  -9.  20.  -1.]
6.0
accuracy: 0.97

Iteration:  828
[ 11.  -9.  20.  -1.]
6.0
accuracy: 0.97

Iteration:  829
[  9.  -9.  20.  -6.]
7.0
accuracy: 0.99

Iteration:  830
[  9.  -9.  20.  -6.]
7.0
accuracy: 0.99

Iteration:  831
[  9.  -9.  20.  -6.]
7.0
accuracy: 0.99

Iteration:  832
[  9.  -9.  20.  -6.]
7.0
accuracy: 0.99

Iteration:  833
[  9.  -9.  20.  -6.]
7.0
accuracy: 0.99

Iteration:  834
[  9.  -9.  20.  -6.]
7.0
accuracy: 0.99

Iteration:  835
[  9.  -9.  20.  -6.]
7.0
accuracy: 0.99

Iteration:  836
[  9.  -9.  20

In [90]:
# test for different step sizes
step_sizes = [10., 1., 0.1, 0.01]
for learning_rate in step_sizes:
    W = np.zeros(4) # initial weights
    b = 0 # initial bias
    print('Learning Rate:', learning_rate)
    for epoch in range(10000):
        # check accuracy
        pred = predict(W, b, X)
        acc = accuracy(pred, y)
        if (acc == 1.0):
            # print data
            print('Iteration: ', epoch)
            print(W)
            print(b)
            print('accuracy:', acc)
            print()
            break
        
        # train using gradient descent
        for i in range(N):
            if -y[i]*f(W, b, X[i]) >= 0:
                W += learning_rate * y[i]*X[i]
                b += learning_rate * y[i]

Learning Rate: 10.0
Iteration:  10
[ 120. -100.  210.  -50.]
40.0
accuracy: 1.0

Learning Rate: 1.0
Iteration:  10
[ 12. -10.  21.  -5.]
4.0
accuracy: 1.0

Learning Rate: 0.1
Iteration:  6
[ 0.9 -0.8  1.6 -0.4]
0.2
accuracy: 1.0

Learning Rate: 0.01
Iteration:  22
[ 0.17 -0.15  0.29 -0.08]
0.04
accuracy: 1.0



### Problem 2

In [91]:
X2 = np.array([-1, 1, -1, 1])
y2 = np.array([-1, 1, -1, 1])

In [106]:
W = np.zeros(1) # initial weights
b = 0 # initial bias
learning_rate = 1.
for epoch in range(100):
    # print data
    print('Iteration: ', epoch)
    print(W)
    print(b)
    # check accuracy
    pred = W*X2 + b
    pred[pred <= 0] = -1
    pred[pred > 0] = 1
    acc = accuracy(pred, y2)
    print('accuracy:', acc)
    if (acc == 1.0):
        break
    print()
    
    # train using gradient descent
    for i in range(4):
        if -y2[i]*(W*X2[i]+b) >= 0:
            W += learning_rate * y2[i]*X2[i]
            b += learning_rate * y2[i]

Iteration:  0
[ 0.]
0
accuracy: 0.5

Iteration:  1
[ 2.]
0.0
accuracy: 1.0
