In [600]:
import numpy as np
%matplotlib inline

In [601]:
def is_vowel(c: str) -> bool:
    return c.lower() in ['а', 'є', 'є', 'и', 'і', 'ї', 'о', 'у', 'ю', 'я']


def word_to_bits(word1):
    return [1 if is_vowel(c) else 0 for c in word1]


def fill_zeros_to_size(array, size):
    while len(array) != size:
        array.append(0)


def inverse_bit(bits, i):
    x = bits.copy()
    x[i] = int(not x[i])
    return x


def get_fullname_vectors(fullname_vector):
    word1, word2, word3 = sorted(fullname_vector,
                                 key=lambda x: len(x),
                                 reverse=True)
    word1_bits = word_to_bits(word1)
    word2_bits = word_to_bits(word2)
    word3_bits = word_to_bits(word3)
    fill_zeros_to_size(word2_bits, len(word1_bits))
    fill_zeros_to_size(word3_bits, len(word1_bits))

    v1 = word1_bits
    v4 = word2_bits
    v5 = word3_bits

    v2, v3 = inverse_bit(v1, 2), inverse_bit(v1, 3)
    v6, v7 = inverse_bit(v4, 2), inverse_bit(v4, 3)
    v8, v9 = inverse_bit(v5, 2), inverse_bit(v5, 3)

    return v1, v2, v3, v4, v5, v6, v7, v8, v9

def is_end_condition(current_weights, previous_weights):
    return np.all(np.abs(current_weights - previous_weights) <= threshold)

def change_weights(_weights, s_i):
    return _weights + (lr * (s_i - _weights))

def get_distances(_weights, vector):
    return np.sum((_weights.T - vector) ** 2, axis=1)

def get_winner_index(_weights, vector):
    return np.argmin(get_distances(_weights, vector))

In [602]:
fullname = np.array(['Іванків', 'Ростислав', 'Богданович'])
print(fullname)
print(f'Max len: {max(*map(len, fullname))}')
print(f'Max word: {max(fullname, key=len)}')

['Іванків' 'Ростислав' 'Богданович']
Max len: 10
Max word: Богданович


In [603]:
fullname_vectors = np.array(get_fullname_vectors(fullname))
print(fullname_vectors)

[[0 1 0 0 1 0 1 0 1 0]
 [0 1 1 0 1 0 1 0 1 0]
 [0 1 0 1 1 0 1 0 1 0]
 [0 1 0 0 1 0 0 1 0 0]
 [1 0 1 0 0 1 0 0 0 0]
 [0 1 1 0 1 0 0 1 0 0]
 [0 1 0 1 1 0 0 1 0 0]
 [1 0 0 0 0 1 0 0 0 0]
 [1 0 1 1 0 1 0 0 0 0]]


In [604]:
weights = np.array([[1.0, 0.6, 0.1],
                    [0.0, 0.2, 0.1],
                    [0.0, 0.2, 0.1],
                    [0.1, 0.3, 0.34],
                    [0.88, 0.3, 0.32],
                    [0.44, 0.2, 0.22],
                    [0.01, 0.6, 0.6],
                    [0.2, 0.6, 0.5],
                    [0.3, 0.3, 0.3],
                    [0.1, 0.2, 1.0]])
print(weights)

[[1.   0.6  0.1 ]
 [0.   0.2  0.1 ]
 [0.   0.2  0.1 ]
 [0.1  0.3  0.34]
 [0.88 0.3  0.32]
 [0.44 0.2  0.22]
 [0.01 0.6  0.6 ]
 [0.2  0.6  0.5 ]
 [0.3  0.3  0.3 ]
 [0.1  0.2  1.  ]]


In [605]:
num_of_vectors, vectors_dimension = fullname_vectors.shape
num_of_classes = 3
lr = 0.6 # learning rate
k = 0.5
threshold = 0.0005

assert (vectors_dimension, num_of_classes) == weights.shape
assert np.all((0 <= weights) & (weights <= 1))

In [606]:
flag = True
while flag:
    for v in fullname_vectors:
        winner_index = get_winner_index(weights, v)
        w_before = weights.copy()
        weights[:,winner_index] = change_weights(weights[:,winner_index], v)
        if is_end_condition(weights, w_before):
            flag = False
    lr = lr * k

print(weights)
print(np.round(weights, decimals=2))

[[1.61984027e-02 5.11528507e-03 9.80817681e-01]
 [9.83801597e-01 9.93179620e-01 2.13136878e-03]
 [3.29351292e-01 3.18268174e-01 6.64254602e-01]
 [4.20244439e-01 4.22461062e-01 4.27150074e-01]
 [9.86802565e-01 9.94032167e-01 6.82038009e-03]
 [1.42886963e-02 1.70509502e-03 9.83375324e-01]
 [3.41019004e-05 9.96589810e-01 1.27882127e-02]
 [9.84483635e-01 5.11528507e-03 1.06568439e-02]
 [1.02305701e-03 9.94032167e-01 6.39410633e-03]
 [3.41019004e-04 1.70509502e-03 2.13136878e-02]]
[[0.02 0.01 0.98]
 [0.98 0.99 0.  ]
 [0.33 0.32 0.66]
 [0.42 0.42 0.43]
 [0.99 0.99 0.01]
 [0.01 0.   0.98]
 [0.   1.   0.01]
 [0.98 0.01 0.01]
 [0.   0.99 0.01]
 [0.   0.   0.02]]


In [607]:
for v in fullname_vectors:
    cluster = get_winner_index(weights, v)
    print(f'The {v} related to {cluster + 1} cluster')

The [0 1 0 0 1 0 1 0 1 0] related to 2 cluster
The [0 1 1 0 1 0 1 0 1 0] related to 2 cluster
The [0 1 0 1 1 0 1 0 1 0] related to 2 cluster
The [0 1 0 0 1 0 0 1 0 0] related to 1 cluster
The [1 0 1 0 0 1 0 0 0 0] related to 3 cluster
The [0 1 1 0 1 0 0 1 0 0] related to 1 cluster
The [0 1 0 1 1 0 0 1 0 0] related to 1 cluster
The [1 0 0 0 0 1 0 0 0 0] related to 3 cluster
The [1 0 1 1 0 1 0 0 0 0] related to 3 cluster
