In [1]:
import numpy as np
from scipy.io import loadmat
from scipy.spatial.distance import cdist


In [2]:
# Method for getting dataset from file
getData_LVQ = lambda: loadmat('lvqdata.mat')['lvqdata']

In [8]:
# given the assignment we know the actual labels
# first 50 are class 1, other 50 are class 2
actual_labels = np.array([1]*50 + [2]*50)
print(np.shape(actual_labels))

data = getData_LVQ()

print(np.shape(data))

(100,)
(100, 2)


In [20]:
def LVQ1(data:np.array, data_labels:np.array, K:int, learning_rate:float, epochs:int):
    N = len(data[0]) # number of features, dimensionality of data
    P = len(data) # number of data points

    # Initialize each prototype by random selection of a data point from the corre-sponding class
    random_indices = np.random.choice(P, K, replace=False)
    prototypes = data[random_indices]
    prototypes_labels = data_labels[random_indices]
    training_error = []

    for t in range(epochs):
        # random permutation is done on indexes instead of datapoints because its easier to handle
        random_indexes = np.random.permutation(range(P)) # random permutation of indexes
        n_missclassifications = 0
        for p_idx in random_indexes:
            x = data[p_idx]

            # find the closest prototype (winner)
            distances = cdist(data[[p_idx]] , prototypes, 'euclidean')[0]
            closest_prototype_index = np.argmin(distances)

            # update the winner according to winner-takes-all
            if data_labels[p_idx] == prototypes_labels[closest_prototype_index]:
                prototypes[closest_prototype_index] = prototypes[closest_prototype_index] + learning_rate * (x - prototypes[closest_prototype_index])
            else:
                prototypes[closest_prototype_index] = prototypes[closest_prototype_index] - learning_rate * (x - prototypes[closest_prototype_index])
                n_missclassifications = n_missclassifications + 1

        #Reporting traning_error
        training_error.append(n_missclassifications/len(data))

    return (N, P, prototypes, training_error)


In [23]:
n_labels = len(np.unique(actual_labels))
prototypes_per_class = 1
K = n_labels * prototypes_per_class
learning_rate =  0.002
epochs = 4

N,P,prototypes, training_error = LVQ1(data, actual_labels, K, learning_rate, epochs)

print(prototypes)
print(training_error)

63
94
42
98
17
0
48
96
89
35
27
74
52
28
14
97
65
54
68
11
26
70
72
58
62
53
49
99
66
59
51
13
87
33
83
73
80
2
90
38
56
82
75
24
45
64
15
30
20
44
1
61
12
21
39
57
85
86
40
9
25
88
69
3
7
36
29
18
19
34
84
91
60
71
5
79
23
50
8
81
67
78
31
32
16
37
92
55
22
95
6
47
10
93
43
4
46
77
76
41
94
84
50
75
77
87
90
45
16
76
66
69
17
79
97
13
96
58
21
41
51
2
9
34
63
7
36
47
67
20
12
46
54
65
26
3
98
92
89
74
38
85
15
49
37
5
72
0
61
88
83
82
78
68
99
55
60
43
25
95
73
48
35
39
24
56
18
33
40
27
32
8
23
14
52
86
81
59
42
30
4
93
10
22
6
64
19
91
53
1
80
31
70
11
28
71
62
57
44
29
26
45
19
13
51
6
67
12
83
29
61
21
34
47
80
1
95
99
92
75
97
98
22
74
50
24
71
25
64
73
23
56
65
91
72
7
28
70
58
89
31
86
79
69
93
14
38
66
0
77
17
4
63
9
5
78
32
8
46
35
48
10
85
43
90
62
81
88
53
41
37
39
15
87
52
82
84
60
68
2
33
11
16
3
27
59
96
36
55
44
30
18
20
40
42
76
94
57
54
49
68
0
35
52
23
85
57
61
47
49
89
5
22
56
75
51
45
17
12
99
19
72
39
95
55
27
1
74
11
41
92
26
77
90
24
82
14
66
32
63
7
48
94
58
97