In [3]:
import numpy as np
import pandas as pd
from sklearn import datasets
iris = datasets.load_iris()

In [2]:
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [3]:
print(iris.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [1]:
# Let's make a simple program to apply the Widrow-Hoff Learning Algorithm
def widrow(n, data, epochs, a, b, eta): #eta is the learning rate
    for e in range(epochs): #for the indicated epochs
        last_a = a
        for i in range(n): #for all datapoints (misclassified and correctly classified)
            last_a = a
            # First, we multiply the initial gradient descent vector (a) by the respective datapoint (y)
            new_a = eta*(b[i] - np.dot(a, np.array(data.iloc[i])))*data.iloc[i]
            
            # Now we update a with its new value
            a = a + np.array(new_a)
            # Before passing to the next point, let's print the last value of a and the new value of a, along with the iteration
            # number
            #print("Iteration #{}: \naTyK = {}".format(e*n+i+1, np.dot(a, np.array(data.iloc[i]))))
            #print('last a: {}\nnew a: {}'.format(np.array(last_a), np.array(a)))
            
            # 2nd version
            print("iteration #{}: aTyK = {} // a = {}".format(n*e+i+1,np.dot(last_a, np.array(data.iloc[i])), a))
            
        # And before exiting the first loop, we print the new values after the first epoch
        print('Epoch #{}: \nlast a: {}\nnew a: {}'.format(e+1, np.array(last_a), np.array(a)))
        
    return a        

In [4]:
# Slide example
b = np.array([2, 2, 2, 2, 2, 2])
y = pd.DataFrame([[1, 0, 0], [1, 1, 0], [1, 2, 1], [-1, 0, -1], [-1, -1, -2]])
a = np.array([-1.5, 5, -1])

# And here we estimate the new vector
w = widrow(n=5, data=y, epochs=2, a=a, b=b, eta=0.2)
print(w) # It should be [-1.0394, 2.2247, -1.624]

iteration #1: aTyK = -1.5 // a = [-0.8  5.  -1. ]
iteration #2: aTyK = 4.2 // a = [-1.24  4.56 -1.  ]
iteration #3: aTyK = 6.879999999999999 // a = [-2.216  2.608 -1.976]
iteration #4: aTyK = 4.192 // a = [-1.7776  2.608  -1.5376]
iteration #5: aTyK = 2.2447999999999997 // a = [-1.72864  2.65696 -1.43968]
Epoch #1: 
last a: [-1.7776  2.608  -1.5376]
new a: [-1.72864  2.65696 -1.43968]
iteration #6: aTyK = -1.7286399999999997 // a = [-0.982912  2.65696  -1.43968 ]
iteration #7: aTyK = 1.674048 // a = [-0.9177216  2.7221504 -1.43968  ]
iteration #8: aTyK = 3.0868991999999995 // a = [-1.13510144  2.28739072 -1.65705984]
iteration #9: aTyK = 2.7921612799999997 // a = [-0.97666918  2.28739072 -1.49862758]
iteration #10: aTyK = 1.686533632 // a = [-1.03936246  2.22469745 -1.62401413]
Epoch #2: 
last a: [-0.97666918  2.28739072 -1.49862758]
new a: [-1.03936246  2.22469745 -1.62401413]
[-1.03936246  2.22469745 -1.62401413]


In [6]:
# Labsheet example
b = np.array([1, 1, 1, 1, 1, 1])
y = pd.DataFrame([[1, 0, 2], [1, 1, 2], [1, 2, 1], [-1, 3, -1], [-1, 2, 1], [-1, 3, 2]])
a = np.array([1, 0, 0])

# We estimate the new vector
w = widrow(n=6, data=y, epochs=2, a=a, b=b, eta=0.1)
print("aT: {}".format(w)) # It should be [0.597, 0.6259, 0.1715]

iteration #1: aTyK = 1 // a = [1. 0. 0.]
iteration #2: aTyK = 1.0 // a = [1. 0. 0.]
iteration #3: aTyK = 1.0 // a = [1. 0. 0.]
iteration #4: aTyK = -1.0 // a = [ 0.8  0.6 -0.2]
iteration #5: aTyK = 0.20000000000000012 // a = [ 0.72  0.76 -0.12]
iteration #6: aTyK = 1.32 // a = [ 0.752  0.664 -0.184]
Epoch #1: 
last a: [ 0.72  0.76 -0.12]
new a: [ 0.752  0.664 -0.184]
iteration #7: aTyK = 0.384 // a = [ 0.8136  0.664  -0.0608]
iteration #8: aTyK = 1.3559999999999999 // a = [ 0.778   0.6284 -0.132 ]
iteration #9: aTyK = 1.9028 // a = [ 0.68772  0.44784 -0.22228]
iteration #10: aTyK = 0.8780799999999997 // a = [ 0.675528  0.484416 -0.234472]
iteration #11: aTyK = 0.05883199999999972 // a = [ 0.5814112  0.6726496 -0.1403552]
iteration #12: aTyK = 1.1558271999999998 // a = [ 0.59699392  0.62590144 -0.17152064]
Epoch #2: 
last a: [ 0.5814112  0.6726496 -0.1403552]
new a: [ 0.59699392  0.62590144 -0.17152064]
aT: [ 0.59699392  0.62590144 -0.17152064]


In [7]:
# Coursework 1 exercise 1
b = np.array([1, 2.5, 0.5, 2.5, 0.5, 1])
y = pd.DataFrame([[1, 0, 2], [1, 1, 2], [1, 2, 1], [-1, 3, -1], [-1, 2, 1], [-1, 3, 2]])
a = np.array([1, 0, 0])

# We estimate the new vector
w = widrow(n=6, data=y, epochs=2, a=a, b=b, eta=0.1)
print("aT: {}".format(w)) # It should be [0.63850576, 0.81717232, -0.43776992]

iteration #1: aTyK = 1 // a = [1. 0. 0.]
iteration #2: aTyK = 1.0 // a = [1.15 0.15 0.3 ]
iteration #3: aTyK = 1.75 // a = [ 1.025 -0.1    0.175]
iteration #4: aTyK = -1.4999999999999998 // a = [ 0.625  1.1   -0.225]
iteration #5: aTyK = 1.35 // a = [ 0.71  0.93 -0.31]
iteration #6: aTyK = 1.46 // a = [ 0.756  0.792 -0.402]
Epoch #1: 
last a: [ 0.71  0.93 -0.31]
new a: [ 0.756  0.792 -0.402]
iteration #7: aTyK = -0.04800000000000004 // a = [ 0.8608  0.792  -0.1924]
iteration #8: aTyK = 1.268 // a = [0.984  0.9152 0.054 ]
iteration #9: aTyK = 2.8684 // a = [ 0.74716  0.44152 -0.18284]
iteration #10: aTyK = 0.7602399999999999 // a = [ 0.573184  0.963448 -0.356816]
iteration #11: aTyK = 0.9968960000000002 // a = [ 0.6228736  0.8640688 -0.4065056]
iteration #12: aTyK = 1.1563216 // a = [ 0.63850576  0.81717232 -0.43776992]
Epoch #2: 
last a: [ 0.6228736  0.8640688 -0.4065056]
new a: [ 0.63850576  0.81717232 -0.43776992]
aT: [ 0.63850576  0.81717232 -0.43776992]


In [8]:
# Coursework 1 exercise 2

# What we are going to do is apply the Widrow-Hoff Learning Algorithm to the Iris dataset, locating instances of class 0 (class 
# 0) and instances of class 1 and 2 (class 1)
y = pd.DataFrame(iris.data) # We put the 150 data points (50 of class 0 and 100 of class 1)
target = iris.target # We locate the 150 targets (0, 1 or 2) in this array

# As the targets have 50 labels of class 0 (first 50 entries), and 100 labels of class 1, we'll concat a all-1s array to the
# df y with the data points. We will later apply sample normalisation to correctly asses the target
new_target = pd.DataFrame([1]*150)

# Now we append this df to the data points to have the data
data = pd.concat([new_target, y], axis=1)
data.columns = [0, 1, 2, 3, 4]
data

Unnamed: 0,0,1,2,3,4
0,1,5.1,3.5,1.4,0.2
1,1,4.9,3.0,1.4,0.2
2,1,4.7,3.2,1.3,0.2
3,1,4.6,3.1,1.5,0.2
4,1,5.0,3.6,1.4,0.2
...,...,...,...,...,...
145,1,6.7,3.0,5.2,2.3
146,1,6.3,2.5,5.0,1.9
147,1,6.5,3.0,5.2,2.0
148,1,6.2,3.4,5.4,2.3


In [9]:
# The last thing we need to do before applying the learning algorithm is to do sample normalisation to change all the signs of
# our datapoints
data.iloc[50:] = -data.iloc[50:] # We also change the target from 1 to -1 because we set all targets to 1
data

Unnamed: 0,0,1,2,3,4
0,1.0,5.1,3.5,1.4,0.2
1,1.0,4.9,3.0,1.4,0.2
2,1.0,4.7,3.2,1.3,0.2
3,1.0,4.6,3.1,1.5,0.2
4,1.0,5.0,3.6,1.4,0.2
...,...,...,...,...,...
145,-1.0,-6.7,-3.0,-5.2,-2.3
146,-1.0,-6.3,-2.5,-5.0,-1.9
147,-1.0,-6.5,-3.0,-5.2,-2.0
148,-1.0,-6.2,-3.4,-5.4,-2.3


In [10]:
# We already have all our data points with its correct label and with the sample normalisation alreay done, let's apply the 
# Widrow-Hoff Learning algorithm:

# We initialise the values:
eta = 0.01 # Learning rate of 0.01
b = np.array([1]*150) # All values of b are equal to 1
a = np.array([0.5, -1.5, 2.5, -0.5, -0.5]) # Inital values of a=(w0, wT)

# We estimate the new vector
new_a2 = widrow(n=150, data=data, epochs=2, a=a, b=b, eta=eta)

print("aT: {}".format(new_a2)) # It should be [0.49750821, -1.04909666, 2.113824, -0.14947402, -0.50274409]

iteration #1: aTyK = 0.8000000000000016 // a = [ 0.502  -1.4898  2.507  -0.4972 -0.4996]
iteration #2: aTyK = -0.07302000000000053 // a = [ 0.5127302  -1.43722202  2.5391906  -0.48217772 -0.49745396]
iteration #3: aTyK = 1.1568747980000005 // a = [ 0.51116145 -1.44459514  2.53417061 -0.48421709 -0.49776771]
iteration #4: aTyK = 0.8960735282506018 // a = [ 0.51220072 -1.43981452  2.53739233 -0.4826582  -0.49755986]
iteration #5: aTyK = 1.6725070604801262 // a = [ 0.50547565 -1.47343987  2.51318207 -0.49207329 -0.49890487]
iteration #6: aTyK = 1.314223879650898 // a = [ 0.50233341 -1.49040796  2.50092734 -0.4974151  -0.50016177]
iteration #7: aTyK = 1.303180081244971 // a = [ 0.49930161 -1.50435424  2.49061922 -0.50165962 -0.50107131]
iteration #8: aTyK = 0.5929320370864462 // a = [ 0.50337229 -1.48400085  2.50445953 -0.4955536  -0.50025717]
iteration #9: aTyK = 0.44287472324944055 // a = [ 0.50894354 -1.45948733  2.52061616 -0.48775385 -0.49914292]
iteration #10: aTyK = 0.38982064376824

iteration #90: aTyK = 1.291288084771959 // a = [ 0.55002248 -1.12389107  2.50586638 -0.29208314 -0.45806241]
iteration #91: aTyK = 0.9509665691517322 // a = [ 0.54953215 -1.12658791  2.50459151 -0.29424061 -0.45865081]
iteration #92: aTyK = 0.8044975595920807 // a = [ 0.54757712 -1.13851356  2.49872644 -0.30323373 -0.46138785]
iteration #93: aTyK = 1.3257131262353763 // a = [ 0.55083425 -1.1196222   2.50719498 -0.2902052  -0.45747929]
iteration #94: aTyK = 0.6958847564955448 // a = [ 0.5477931  -1.13482796  2.50020033 -0.300241   -0.46052044]
iteration #95: aTyK = 0.916391402395603 // a = [ 0.54695702 -1.13951004  2.49794289 -0.30375257 -0.46160735]
iteration #96: aTyK = 0.2841111541768564 // a = [ 0.53979813 -1.18031571  2.47646623 -0.3338199  -0.47019802]
iteration #97: aTyK = 1.0195503413682951 // a = [ 0.53999363 -1.17920134  2.47703319 -0.33299878 -0.46994387]
iteration #98: aTyK = 1.630480215037264 // a = [ 0.54629843 -1.14011157  2.49531711 -0.30588813 -0.46174762]
iteration #99

iteration #181: aTyK = 0.7013698956061605 // a = [ 0.49629681 -1.27133602  2.25228681 -0.29159091 -0.49902125]
iteration #182: aTyK = 0.6518625795404756 // a = [ 0.49977818 -1.2525366   2.26412348 -0.28636885 -0.4976287 ]
iteration #183: aTyK = 2.790177989428461 // a = [ 0.4818764  -1.34562585  2.19072618 -0.31322152 -0.49941888]
iteration #184: aTyK = 1.7435902643884775 // a = [ 0.4744405  -1.38652332  2.15949539 -0.32363178 -0.50090606]
iteration #185: aTyK = -0.21071693820545878 // a = [ 0.48654767 -1.32719819  2.19702762 -0.30547103 -0.49848463]
iteration #186: aTyK = 0.41478293509830877 // a = [ 0.49239984 -1.29793734  2.21575456 -0.29844842 -0.49731419]
iteration #187: aTyK = 0.6214396701369127 // a = [ 0.49618544 -1.27711652  2.22900417 -0.29352714 -0.49655707]
iteration #188: aTyK = 1.8021358287318332 // a = [ 0.48816409 -1.31642117  2.20012728 -0.30475704 -0.49735921]
iteration #189: aTyK = 0.8006367785911745 // a = [ 0.49015772 -1.30764919  2.20610818 -0.30216532 -0.49696048]

iteration #275: aTyK = -0.10794592973255224 // a = [ 0.50401455 -1.02597257  2.16518601 -0.12045527 -0.48499761]
iteration #276: aTyK = 1.550120000600904 // a = [ 0.50951575 -0.98636393  2.18278985 -0.08744807 -0.47509545]
iteration #277: aTyK = 0.7690515464925545 // a = [ 0.50720627 -1.00068273  2.17632329 -0.0985336  -0.47925252]
iteration #278: aTyK = 0.413457656451202 // a = [ 0.50134084 -1.03646181  2.15872702 -0.12727417 -0.48981028]
iteration #279: aTyK = 1.828916037832811 // a = [ 0.50963    -0.98341119  2.18193667 -0.08085487 -0.47240304]
iteration #280: aTyK = 1.2499236516596444 // a = [ 0.51212924 -0.96541668  2.18943438 -0.0663593  -0.46840427]
iteration #281: aTyK = 1.7962977867795331 // a = [ 0.52009222 -0.90649065  2.21173072 -0.01778514 -0.45327461]
iteration #282: aTyK = -0.7430187578954657 // a = [ 0.50266203 -1.04418913  2.14549601 -0.12933834 -0.48813498]
iteration #283: aTyK = 1.9709512200818864 // a = [ 0.51237154 -0.98204825  2.17268264 -0.07496507 -0.46677406]
i

In [11]:
# We already have our classification decision boundary. It's the plane that divides the feature space in two and it has this
# norm [0.49750821 -1.04909666  2.113824   -0.14947402 -0.50274409]

# What the second part of this coursework asks us is to reporte the percentage of correct outputs for both classes. So, 
# technically what we have to do is to multiply the data points to the equation and see if it's over or below zero

# For datapoints from class 1:
score = 0
for i in range(50):
    result = np.dot(new_a2, data.iloc[i])
    if result > 0:
        score +=1

print("Class 1 score: {}".format(score))
total_score = score

# For datapoints from the other class:
# First we have to change the data set and put the points as they were before
data.iloc[50:, 1:5] = -data.iloc[50:, 1:5]

# Now we can apply the score:
score = 0
for i in range(50, 150):
    result = np.dot(new_a2, data.iloc[i])
    if result <= 0:
        score +=1

print("Class -1 score: {}".format(score))
total_score += score
print("Total correctly predicted instances: {}\nPercentage of correct instances: {}".format(total_score, total_score*100/150))
# It should be 100.0%

Class 1 score: 50
Class -1 score: 100
Total correctly predicted instances: 150
Percentage of correct instances: 100.0


In [12]:
# We can see that our classifier performed great for this division, now let's see the error rate of the first vector we had 
# before training: a = np.array([0.5, -1.5, 2.5, -0.5, -0.5])
# For datapoints from class 1:
first_a = np.array([0.5, -1.5, 2.5, -0.5, -0.5])

score = 0
for i in range(50):
    result = np.dot(first_a, data.iloc[i])
    if result > 0:
        score +=1

print("Class 1 score: {}".format(score))
total_score = score

# For datapoints from the other class:
# First we have to change the data set and put the points as they were before
# data.iloc[50:, 1:5] = -data.iloc[50:, 1:5]. We don't have to this anymore because we already did it before

# Now we can apply the score:
score = 0
for i in range(50, 150):
    result = np.dot(first_a, data.iloc[i])
    if result <= 0:
        score +=1

print("Class -1 score: {}".format(score))
total_score += score
print("Total correctly predicted instances: {}\nPercentage of correct instances: {}".format(total_score, total_score*100/150))
# It should be 96.0%

Class 1 score: 44
Class -1 score: 100
Total correctly predicted instances: 144
Percentage of correct instances: 96.0


In [13]:
# Coursework 1 exercise 3

# This exercise is a little bit different, it asks us to perfom a k-Nearest-Neighbour Classifier with k=1 and k=5 to the Iris
# dataset to the 5 following datapoints:
new_datapoints = pd.DataFrame([[7.0, 2.9, 6.3, 0.1],[7.8, 2.1, 1.7, 1.2], [6.5, 3.8, 1.1, 0.9], [6.4, 3.7, 3.0, 0.4], [4.8, 2.9, 5.3, 0.2]])

# The exercise recommends us to use the data science package called scikit-learn, which provides "KneighborsClassifier" that
# performs this model.

In [14]:
# Let's start by importing the requested library
from sklearn.neighbors import KNeighborsClassifier

# Now we initialise the model with k=1 first
neigh1 = KNeighborsClassifier(n_neighbors=1, weights='distance')

# Now we train our model with the data points from the Iris dataset
neigh1.fit(iris.data, iris.target)

# Now let's see where would this classifier locate the new datapoints
neigh1.predict(new_datapoints)

array([2, 1, 0, 0, 1])

In [15]:
# Now let's do the same but with k=5:
neigh5 = KNeighborsClassifier(n_neighbors=5, weights='distance')

# Now we train our model with the data points from the Iris dataset
neigh5.fit(iris.data, iris.target)

# Now let's see where would this classifier locate the new datapoints
neigh5.predict(new_datapoints)

array([2, 0, 0, 1, 1])