In [1]:
import numpy as np

**Motivation**

In the Perceptron learning algorithm for solve a classification problem, we should find a hyperplane that separates data points to two class if the data are linearly separable. 

First, we read the data from `Q1data.csv` file and store it in a 2D numpy array called `points`.

In [2]:
points = np.zeros(shape = (500, 4), dtype = 'float')

In [3]:
def dataLoader():
    with open('.\\Q1data.csv', 'r') as f:
        lines = f.readlines()
        n = len(lines)
        for i in range(1, n):
            l = list(map(float, lines[i].split(',')))
            for j in range(len(l)):
                points[i-1, j] = l[j]

In [4]:
dataLoader()

Obviously, in the learning phase we want to learn what the `weights` belong to the hyperplane. 

so `weights` is a array that save weight of each $x_{i}$ in the classifier and $w_{0}$ that has the role of threshold.

and we update it by misclassified data from training data by below formula until all training data are correctly classified

$weights = weights + \eta*y*x$ <br>
which $\eta$ is learning rate

In [5]:
def perceptron(alpha:float):
    weights = np.random.random(size = len(points[0])-1+1)
    iteration = 0
    while True:
        iteration +=1
        misClassFlag = False
        n = 0
        for i in range(len(points)):
            res = weights[-1]
            for j in range(len(weights)-1):
                res += points[i, j] * weights[j]
            
            if res * points[i, -1] < 0: #that means they're different sign => misclassified
                misClassFlag = True
                n+=1
                for j in range(len(weights)-1):   
                    weights[j] += alpha * points[i, -1] * points[i, j]
                break
        
        print(iteration, weights)
        
        if not misClassFlag: break
    
    print(f'Perceptron Classifier: {weights[0]} x + {weights[1]} y + {weights[2]} z + {weights[3]}')

    # printing result of the model and answer show that algorithm works right
    correctness = True
    for i in range(len(points)):
        res = weights[-1]
        for j in range(len(weights)-1):
            res += points[i, j] * weights[j]
            
        print(res, points[i, -1], end="\t")
        if res * points[i, -1] > 0:
            print("correctly classified")
        else:
            print("misclassified")
            correctness = False
            
    print(correctness)

Now we run code for different $\eta$ (learning rate) value to analyze its effect.

In [6]:
perceptron(1)

1 [ 2.21586517e+02  8.14887128e+00 -6.29302666e+02  4.40647812e-01]
2 [ 2.24714371e+02  6.39038202e-01 -5.70243373e+02  4.40647812e-01]
3 [ 2.27842226e+02 -6.87079487e+00 -5.11184079e+02  4.40647812e-01]
4 [ 2.30970080e+02 -1.43806279e+01 -4.52124786e+02  4.40647812e-01]
5 [ 234.09793471  -21.89046102 -393.06549294    0.44064781]
6 [ 237.22578916  -29.40029409 -334.00619969    0.44064781]
7 [ 240.3536436   -36.91012717 -274.94690643    0.44064781]
8 [ 243.48149805  -44.41996024 -215.88761318    0.44064781]
9 [ 246.60935249  -51.92979332 -156.82831993    0.44064781]
10 [249.73720694 -59.43962639 -97.76902667   0.44064781]
11 [252.86506138 -66.94945946 -38.70973342   0.44064781]
12 [255.99291583 -74.45929254  20.34955983   0.44064781]
13 [ 199.26600559 -143.20678952   56.34985266    0.44064781]
14 [ 195.90086943 -204.88542835 -165.73263218    0.44064781]
15 [ 199.02872387 -212.39526143 -106.67333892    0.44064781]
16 [ 202.15657832 -219.9050945   -47.61404567    0.44064781]
17 [ 205.2844

In [7]:
perceptron(.7)

1 [ 155.13231825    5.73814618 -440.47011866    0.53241925]
2 [ 157.32181637    0.48126303 -399.12861338    0.53241925]
3 [ 159.51131448   -4.77562012 -357.78710811    0.53241925]
4 [ 161.70081259  -10.03250328 -316.44560283    0.53241925]
5 [ 163.8903107   -15.28938643 -275.10409755    0.53241925]
6 [ 166.07980881  -20.54626958 -233.76259227    0.53241925]
7 [ 168.26930692  -25.80315273 -192.421087      0.53241925]
8 [ 170.45880504  -31.06003588 -151.07958172    0.53241925]
9 [ 172.64830315  -36.31691904 -109.73807644    0.53241925]
10 [174.83780126 -41.57380219 -68.39657116   0.53241925]
11 [177.02729937 -46.83068534 -27.05506589   0.53241925]
12 [179.21679748 -52.08756849  14.28643939   0.53241925]
13 [ 139.50796032 -100.21081638   39.48664437    0.53241925]
14 [ 137.152365   -143.38586356 -115.97109502    0.53241925]
15 [ 139.34186311 -148.64274671  -74.62958974    0.53241925]
16 [ 141.53136123 -153.89962987  -33.28808446    0.53241925]
17 [ 143.72085934 -159.15651302    8.05342081

In [8]:
perceptron(.5)

1 [ 111.42842237    3.90451481 -314.25579476    0.51241891]
2 [ 1.12992350e+02  1.49598271e-01 -2.84726148e+02  5.12418912e-01]
3 [ 114.55627682   -3.60531827 -255.1965015     0.51241891]
4 [ 116.12020404   -7.3602348  -225.66685488    0.51241891]
5 [ 117.68413126  -11.11515134 -196.13720825    0.51241891]
6 [ 119.24805848  -14.87006788 -166.60756162    0.51241891]
7 [ 120.81198571  -18.62498441 -137.077915      0.51241891]
8 [ 122.37591293  -22.37990095 -107.54826837    0.51241891]
9 [123.93984015 -26.13481749 -78.01862174   0.51241891]
10 [125.50376737 -29.88973402 -48.48897512   0.51241891]
11 [127.0676946  -33.64465056 -18.95932849   0.51241891]
12 [128.63162182 -37.3995671   10.57031813   0.51241891]
13 [100.2681667  -71.77331559  28.57046455   0.51241891]
14 [  98.58559862 -102.61263501  -82.47077787    0.51241891]
15 [ 100.14952584 -106.36755154  -52.94113124    0.51241891]
16 [ 101.71345307 -110.12246808  -23.41148462    0.51241891]
17 [ 103.27738029 -113.87738462    6.11816201

In [9]:
perceptron(.2)

1 [  45.19808409    1.8416132  -125.49281349    0.30957827]
2 [  45.82365498    0.33964658 -113.68095484    0.30957827]
3 [  46.44922586   -1.16232003 -101.86909619    0.30957827]
4 [ 47.07479675  -2.66428665 -90.05723754   0.30957827]
5 [ 47.70036764  -4.16625326 -78.24537889   0.30957827]
6 [ 48.32593853  -5.66821988 -66.43352024   0.30957827]
7 [ 48.95150942  -7.17018649 -54.62166159   0.30957827]
8 [ 49.57708031  -8.67215311 -42.80980294   0.30957827]
9 [ 50.2026512  -10.17411972 -30.99794429   0.30957827]
10 [ 50.82822209 -11.67608634 -19.18608564   0.30957827]
11 [ 51.45379298 -13.17805295  -7.37422699   0.30957827]
12 [ 52.07936387 -14.68001957   4.43763166   0.30957827]
13 [ 40.73398182 -28.42951896  11.63769023   0.30957827]
14 [ 40.06095459 -40.76524673 -32.77880674   0.30957827]
15 [ 40.68652548 -42.26721334 -20.96694809   0.30957827]
16 [ 41.31209636 -43.76917996  -9.15508944   0.30957827]
17 [ 41.93766725 -45.27114657   2.65676921   0.30957827]
18 [ 59.61214247 -16.6927153

In [10]:
perceptron(.005)

1 [ 1.71780116  0.48127828 -2.75277674  0.89300399]
2 [ 1.73344043  0.44372912 -2.45748027  0.89300399]
3 [ 1.7490797   0.40617995 -2.1621838   0.89300399]
4 [ 1.76471897  0.36863078 -1.86688734  0.89300399]
5 [ 1.78035824  0.33108162 -1.57159087  0.89300399]
6 [ 1.79599752  0.29353245 -1.2762944   0.89300399]
7 [ 1.81163679  0.25598329 -0.98099794  0.89300399]
8 [ 1.82727606  0.21843412 -0.68570147  0.89300399]
9 [ 1.84291533  0.18088496 -0.39040501  0.89300399]
10 [ 1.85855461  0.14333579 -0.09510854  0.89300399]
11 [ 1.57492005 -0.20040169  0.08489292  0.89300399]
12 [ 1.2912855  -0.54413918  0.26489439  0.89300399]
13 [ 1.00765095 -0.88787666  0.44489585  0.89300399]
14 [ 2.11316111 -0.85196122 -2.70478107  0.89300399]
15 [ 2.12880038 -0.88951038 -2.4094846   0.89300399]
16 [ 2.14443965 -0.92705955 -2.11418814  0.89300399]
17 [ 2.16007893 -0.96460871 -1.81889167  0.89300399]
18 [ 2.1757182  -1.00215788 -1.52359521  0.89300399]
19 [ 2.19135747 -1.03970705 -1.22829874  0.89300399]
20

**conclusion**

$\eta$ = 1 -> after 277 epoch converged<br>
$\eta$ = .7 -> after 277 epoch converged<br>
$\eta$ = .5 -> after 309 epoch converged<br>
$\eta$ = .2 -> after 339 epoch converged<br>
$\eta$ = .005 -> after 321 epoch converged<br>

we could conclude that more learning rate helps to find hyperplane quickly.