In [58]:
from sklearn.datasets import load_iris
import numpy as np
from sklearn.metrics import euclidean_distances

In [59]:
data = load_iris()
x = data['data']
y = data['target']

In [60]:
len(x)

150

In [61]:
# use Minmaxscaler because we use euclidean distance
from sklearn.preprocessing import MinMaxScaler
minmax = MinMaxScaler()
x = minmax.fit_transform(x)

In [62]:
# parameter for LVQ
R = 2
n_classes = 3
epsilon = 0.9
epsilon_dec_factor = 0.001

In [63]:
# class of prototype vectors
class prototype(object):
    def __init__(self, class_id, p_vector, epsilon):
        self.class_id = class_id
        self.p_vector = p_vector
        self.epsilon = epsilon
    def update(self, u_vector, increment = True):
        if increment:
            # Move the prototype closer to input vector
            self.p_vector = self.p_vector + self.epsilon * (u_vector - self.p_vector)
        else:
            # Move the prototype away from input vector
            self.p_vector = self.p_vector - self.epsilon * (u_vector - self.p_vector)

In [64]:
# function to find the closest prototype vector for a given vector
def find_closest(in_vector, proto_vectors):
    closest = None
    closest_distance = 99999
    for p_v in proto_vectors:
        distance = np.linalg.norm(in_vector - p_v.p_vector)
        if distance < closest_distance:
            closest_distance = distance
            closest = p_v
    return closest

In [65]:
def find_class_id(test_vector, p_vectors):
    return find_closest(test_vector, p_vectors).class_id

In [66]:
# Choose R initial prototype for each class
p_vectors = []
for i in range(n_classes):
    # select class i
    y_subset = np.where(y == i)
    # select tuple for chosen class
    x_subset = x[y_subset]
    # get R random indices between 0 and 50
    samples = np.random.randint(0, len(x_subset), R)
    # select p_vectors, they are chosen randomly from the samples x
    for sample in samples:
        s = x_subset[sample]
        p = prototype(i, s, epsilon)
        p_vectors.append(p)
print("class id \t Initial prototype vector \n")
for p_v in p_vectors:
    print(p_v.class_id, '\t', p_v.p_vector)

class id 	 Initial prototype vector 

0 	 [0.08333333 0.5        0.06779661 0.04166667]
0 	 [0.22222222 0.75       0.08474576 0.08333333]
1 	 [0.22222222 0.20833333 0.33898305 0.41666667]
1 	 [0.38888889 0.41666667 0.54237288 0.45833333]
2 	 [0.55555556 0.375      0.77966102 0.70833333]
2 	 [0.72222222 0.45833333 0.69491525 0.91666667]


In [67]:
while epsilon >= 0.01:
    rnd_i = np.random.randint(0, 149)
    rnd_s = x[rnd_i]
    target_y = y[rnd_i]
    
    epsilon = epsilon - epsilon_dec_factor
    
    closest_pvector = find_closest(rnd_s, p_vectors)
    
    if target_y == closest_pvector.class_id:
        closest_pvector.update(rnd_s)
    else:
        closest_pvector.update(rnd_s, False)
    closest_pvector.epsilon = epsilon

print("class id \t Final prototype vector \n")
for p_v in p_vectors:
    print(p_v.class_id, '\t', p_v.p_vector)

class id 	 Final prototype vector 

0 	 [0.18056518 0.54783409 0.08733157 0.05430503]
0 	 [0.27838904 0.73531604 0.08667255 0.06940502]
1 	 [0.48705887 0.32792022 0.53428568 0.48836263]
1 	 [0.59871687 0.53584879 0.13685186 0.08903829]
2 	 [0.51087556 0.32419365 0.72289987 0.75405417]
2 	 [0.75741718 0.4700766  0.8345566  0.89834207]


In [70]:
predicted_y = [find_class_id(instance, p_vectors) for instance in x]

from sklearn.metrics import classification_report

print (classification_report(y, predicted_y, target_names=['Iris-Setosa','Iris-Versicolour', 'Iris-Virginica']))

                  precision    recall  f1-score   support

     Iris-Setosa       1.00      1.00      1.00        50
Iris-Versicolour       0.96      0.90      0.93        50
  Iris-Virginica       0.91      0.96      0.93        50

     avg / total       0.95      0.95      0.95       150

