In [1]:
import numpy as np
from scipy.io import loadmat
from scipy.spatial.distance import cdist


In [2]:
# Method for getting dataset from file
getData_LVQ = lambda: loadmat('lvqdata.mat')['lvqdata']

In [8]:
# given the assignment we know the actual labels
# first 50 are class 1, other 50 are class 2
actual_labels = np.array([1]*50 + [2]*50)
print(np.shape(actual_labels))

data = getData_LVQ()

print(np.shape(data))

(100,)
(100, 2)


In [36]:
def LVQ1(data:np.array, data_labels:np.array, K:int, learning_rate:float, epochs:int):
    N = len(data[0]) # number of features, dimensionality of data
    P = len(data) # number of data points

    # initialize each prototype by random selection of a data point from the corre-sponding class
    # make sure one prototype is from each class
    random_indices = []
    while len(np.unique(data_labels[random_indices])) != len(np.unique(data_labels)):
        random_indices = np.random.choice(P, K, replace=False)

    prototypes = data[random_indices]
    prototypes_labels = data_labels[random_indices]
    training_error = []

    for t in range(epochs):
        # random permutation is done on indexes instead of datapoints because its easier to handle
        random_indexes = np.random.permutation(range(P)) # random permutation of indexes
        n_missclassifications = 0
        for p_idx in random_indexes:
            x = data[p_idx]
            # find the closest prototype (winner)
            distances = cdist(data[[p_idx]] , prototypes, 'euclidean')[0]
            closest_prototype_index = np.argmin(distances)
            # update the winner according to winner-takes-all
            if data_labels[p_idx] == prototypes_labels[closest_prototype_index]:
                prototypes[closest_prototype_index] = prototypes[closest_prototype_index] + learning_rate * (x - prototypes[closest_prototype_index])
            else:
                prototypes[closest_prototype_index] = prototypes[closest_prototype_index] - learning_rate * (x - prototypes[closest_prototype_index])
                n_missclassifications = n_missclassifications + 1

        # training error over epochs
        training_error.append(n_missclassifications/len(data))

    return (N, P, prototypes, training_error)


In [39]:

# results 5.2
# a) one prototype per class
n_labels = len(np.unique(actual_labels))
prototypes_per_class = 1
K = n_labels * prototypes_per_class
learning_rate =  0.002
epochs = 200
N,P,prototypes, training_error = LVQ1(data, actual_labels, K, learning_rate, epochs)
print(prototypes)
print(training_error)

# b) two prototypes per class
prototypes_per_class = 2
K = n_labels * prototypes_per_class
learning_rate =  0.002
epochs = 200
N,P,prototypes, training_error = LVQ1(data, actual_labels, K, learning_rate, epochs)
print(prototypes)
print(training_error)


[[0.82762397 0.46962788]
 [0.20162512 0.40369738]]
[0.32, 0.31, 0.3, 0.29, 0.3, 0.31, 0.31, 0.31, 0.3, 0.29, 0.29, 0.27, 0.27, 0.27, 0.27, 0.25, 0.24, 0.24, 0.25, 0.25, 0.25, 0.25, 0.25, 0.26, 0.26, 0.26, 0.24, 0.23, 0.23, 0.22, 0.22, 0.22, 0.22, 0.22, 0.22, 0.21, 0.21, 0.21, 0.21, 0.21, 0.22, 0.21, 0.21, 0.22, 0.2, 0.21, 0.21, 0.22, 0.22, 0.21, 0.21, 0.22, 0.21, 0.22, 0.22, 0.22, 0.22, 0.22, 0.22, 0.22, 0.22, 0.22, 0.22, 0.22, 0.22, 0.22, 0.22, 0.22, 0.22, 0.22, 0.22, 0.22, 0.22, 0.21, 0.21, 0.21, 0.22, 0.2, 0.2, 0.22, 0.21, 0.21, 0.21, 0.2, 0.2, 0.22, 0.21, 0.22, 0.21, 0.22, 0.23, 0.2, 0.2, 0.21, 0.2, 0.2, 0.2, 0.2, 0.22, 0.21, 0.21, 0.2, 0.22, 0.22, 0.2, 0.2, 0.22, 0.2, 0.22, 0.2, 0.21, 0.2, 0.21, 0.21, 0.2, 0.21, 0.21, 0.2, 0.21, 0.2, 0.22, 0.21, 0.21, 0.2, 0.21, 0.2, 0.2, 0.2, 0.21, 0.2, 0.22, 0.21, 0.23, 0.21, 0.2, 0.21, 0.2, 0.21, 0.2, 0.21, 0.21, 0.22, 0.2, 0.21, 0.21, 0.22, 0.2, 0.22, 0.21, 0.21, 0.2, 0.21, 0.22, 0.2, 0.2, 0.21, 0.21, 0.21, 0.21, 0.2, 0.21, 0.22, 0.22, 0.21, 0

KeyboardInterrupt: 