In [1]:
import numpy as np
from scipy.io import loadmat
from scipy.spatial.distance import cdist


In [2]:
# Method for getting dataset from file
getData_LVQ = lambda: loadmat('lvqdata.mat')['lvqdata']

In [8]:
# given the assignment we know the actual labels
# first 50 are class 1, other 50 are class 2
actual_labels = np.array([1]*50 + [2]*50)
print(np.shape(actual_labels))

data = getData_LVQ()

print(np.shape(data))

(100,)
(100, 2)


In [18]:
def LVQ1(data:np.array, data_labels:np.array, K:int, learning_rate:float, epochs:int):
    N = len(data[0]) # number of features, dimensionality of data
    P = len(data) # number of data points

    # Initialize each prototype by random selection of a data point from the corre-sponding class
    random_indices = np.random.choice(P, K, replace=False)
    prototypes = data[random_indices]
    prototypes_labels = data_labels[random_indices]
    training_error = []

    for t in range(epochs):
        # random permutation is done on indexes instead of datapoints because its easier to handle
        random_indexes = np.random.permutation(range(P)) # random permutation of indexes
        n_missclassifications = 0
        for p_idx in random_indexes:
            print(p_idx)
            x = data[p_idx]

            # find the closest prototype (winner)
            distances = cdist(data[[p_idx]] , prototypes, 'euclidean')[0]
            closest_prototype_index = np.argmin(distances)

            # update the winner according to winner-takes-all
            if data_labels[p_idx] == prototypes_labels[closest_prototype_index]:
                prototypes[closest_prototype_index] = prototypes[closest_prototype_index] + learning_rate * (x - prototypes[closest_prototype_index])
            else:
                prototypes[closest_prototype_index] = prototypes[closest_prototype_index] - learning_rate * (x - prototypes[closest_prototype_index])
                n_missclassifications = n_missclassifications + 1

        #Reporting traning_error
        training_error.append(n_missclassifications/len(data))

    return (N, P, prototypes, training_error)


In [19]:
n_labels = len(np.unique(actual_labels))
prototypes_per_class = 1
K = n_labels * prototypes_per_class
learning_rate =  0.002
epochs = 4

N,P,prototypes, training_error = LVQ1(data, K, learning_rate, epochs)

print(prototypes)
print(training_error)

4
38
79
31
28
77
68
81
9
21
97
50
42
58
40
75
85
66
87
36
76
29
19
37
43
5
3
55
95
72
94
80
54
88
49
14
70
83
64
32
86
10
17
89
51
41
59
82
46
16
44
47
34
23
33
39
71
24
69
57
1
52
63
27
99
61
26
56
22
13
53
6
90
96
65
35
18
73
15
7
84
78
25
30
45
8
20
60
98
92
67
62
2
11
48
12
0
93
74
91
70
86
87
84
36
47
60
89
34
44
45
6
65
23
35
8
10
53
26
5
21
67
76
11
32
55
69
83
29
3
42
41
19
92
40
74
12
51
88
22
99
94
15
82
75
50
63
43
78
62
27
73
81
54
9
28
1
14
0
93
56
61
7
52
49
30
97
57
96
64
33
58
79
25
85
95
98
37
91
24
46
71
72
13
17
16
77
48
90
2
80
20
4
39
18
59
31
68
38
66
37
9
35
23
76
24
71
99
93
34
57
17
2
48
38
22
47
1
50
77
56
51
55
0
14
26
92
6
44
20
98
75
86
78
94
12
7
61
85
54
95
73
89
21
15
27
30
81
91
67
62
41
16
88
84
5
8
11
66
43
96
58
29
33
49
74
40
69
83
3
28
68
46
10
70
59
72
39
25
52
45
64
60
90
36
97
63
4
80
13
19
65
87
32
18
53
42
31
79
82
14
10
86
75
47
93
54
48
40
20
46
31
85
63
89
74
83
2
7
67
25
38
79
58
8
62
65
95
56
13
70
18
53
34
15
52
84
50
33
39
35
97
37
3
55