In [63]:
from sklearn.datasets import load_iris
import numpy as np
from sklearn.metrics import euclidean_distances
import pandas as pd

In [64]:
data = load_iris()
x = data['data']
y = data['target']
# data = pd.read_csv(r"SD-2X_rocktype.csv")
# x = data.iloc[:, 0:6].values
# y = data.iloc[:, 6].values

In [65]:
np.unique(y)

array([0, 1, 2])

In [66]:
# use Minmaxscaler because we use euclidean distance
from sklearn.cross_validation import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=44)

# use Minmaxscaler because we use euclidean distance
from sklearn.preprocessing import MinMaxScaler
minmax = MinMaxScaler()
x_train = minmax.fit_transform(x_train)
x_test = minmax.transform(x_test)

In [67]:
# parameter for LVQ
R = 2 # R is the # of initial prototype for each class
n_classes = 3
epsilon = 0.9
epsilon_dec_factor = 0.001

In [68]:
# class of prototype vectors
class prototype(object):
    def __init__(self, class_id, p_vector, epsilon):
        self.class_id = class_id
        self.p_vector = p_vector
        self.epsilon = epsilon
    def update(self, u_vector, increment = True):
        if increment:
            # Move the prototype closer to input vector
            self.p_vector = self.p_vector + self.epsilon * (u_vector - self.p_vector)
        else:
            # Move the prototype away from input vector
            self.p_vector = self.p_vector - self.epsilon * (u_vector - self.p_vector)

In [69]:
# function to find the closest prototype vector for a given vector
def find_closest(in_vector, proto_vectors):
    position = None
    closest = None
    closest_distance = 99999
#     for p_v in proto_vectors:
#         distance = np.linalg.norm(in_vector - p_v.p_vector)
#         if distance < closest_distance:
#             closest_distance = distance
#             closest = p_v
    for i in range(len(proto_vectors)):
        distance = np.linalg.norm(in_vector - proto_vectors[i].p_vector)
        if distance < closest_distance:
            closest_distance = distance
            closest = proto_vectors[i]
            position = i
    return [position, closest]

In [70]:
def find_class_id(test_vector, p_vectors):
    return find_closest(test_vector, p_vectors)[1].class_id

In [71]:
# Choose R initial prototype for each class
p_vectors = []
for i in range(n_classes):
    # select class i
    y_subset = np.where(y_train == i)
    # select tuple for chosen class
    x_subset = x_train[y_subset]
    # get R random indices between 0 and 50
    samples = np.random.randint(0, len(x_subset), R)
    # select p_vectors, they are chosen randomly from the samples x
    for sample in samples:
        s = x_subset[sample]
        p = prototype(i, s, epsilon)
        p_vectors.append(p)
print("class id \t Initial prototype vector \n")
for p_v in p_vectors:
    print(p_v.class_id, '\t', p_v.p_vector)

class id 	 Initial prototype vector 

0 	 [0.19444444 0.625      0.05263158 0.08333333]
0 	 [0.13888889 0.41666667 0.07017544 0.        ]
1 	 [0.41666667 0.29166667 0.50877193 0.45833333]
1 	 [0.41666667 0.29166667 0.54385965 0.375     ]
2 	 [0.61111111 0.41666667 0.78947368 0.70833333]
2 	 [0.61111111 0.41666667 0.73684211 0.79166667]


In [72]:
while epsilon >= 0.01:
    rnd_i = np.random.randint(0, len(x_train))
    rnd_s = x_train[rnd_i]
    target_y = y_train[rnd_i]
    
    epsilon = epsilon - epsilon_dec_factor
    
    index, closest_pvector = find_closest(rnd_s, p_vectors)
    
    if target_y == closest_pvector.class_id:
        closest_pvector.update(rnd_s)
    else:
        closest_pvector.update(rnd_s, False)
    closest_pvector.epsilon = epsilon

# #     LVQ neighbor version
#     if index >= 10 and index <90:
#         update_p_vectors = [closest_pvector, p_vectors[index-1], p_vectors[index+1], p_vectors[index+10], p_vectors[index-10]]
#         for p in update_p_vectors:
#             if target_y == p.class_id:
#                 p.update(rnd_s)
#             else:
#                 p.update(rnd_s, False)
#             p.epsilon = epsilon
#     else:
#         if target_y == closest_pvector.class_id:
#             closest_pvector.update(rnd_s)
#         else:
#             closest_pvector.update(rnd_s, False)
#         closest_pvector.epsilon = epsilon
print("class id \t Final prototype vector \n")
for p_v in p_vectors:
    print(p_v.class_id, '\t', p_v.p_vector)

class id 	 Final prototype vector 

0 	 [0.29981465 0.74532058 0.08830452 0.07539919]
0 	 [0.12895255 0.48082831 0.07358992 0.04552514]
1 	 [0.55011726 0.41299603 0.62315608 0.55325563]
1 	 [0.37885878 0.2057407  0.52381255 0.46703129]
2 	 [0.62559542 0.43513666 0.77587503 0.8509124 ]
2 	 [0.86435428 0.57981521 0.90836165 0.84685459]


In [73]:
# predicted_y = [find_class_id(instance, p_vectors) for instance in x]

# from sklearn.metrics import classification_report

# print (classification_report(y, predicted_y, target_names=['Iris-Setosa','Iris-Versicolour', 'Iris-Virginica']))

In [74]:
predicted_y = [find_class_id(instance, p_vectors) for instance in x_test]

from sklearn.metrics import classification_report

print (classification_report(y_test, predicted_y, target_names=['0', '1', '2']))

             precision    recall  f1-score   support

          0       1.00      1.00      1.00        13
          1       0.80      1.00      0.89        12
          2       1.00      0.77      0.87        13

avg / total       0.94      0.92      0.92        38

