In [1]:
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns

In [2]:
def generate_dataset(n_samples):
    p_1 = [1/3, 1/3, 1/3]
    p_2 = [0/10, 1/10, 9/10]

    X = np.random.binomial(size=n_samples, n=1, p=0.2)
    repeat_nb = 10000

    data = []

    for i in range(0, n_samples):
        if X[i] == 0:
            # first class
            data.append(np.random.multinomial(repeat_nb, p_1)/repeat_nb)
        else:
            # second class
            data.append(np.random.multinomial(repeat_nb, p_2)/repeat_nb)

    return np.array(data)

In [4]:
class VQ:
    def __init__(self):
        self.neurons_ = None
        self.neurons_nb_ = 2

    
    def fit(self, X:np.ndarray, epochs, learning_rate=0.9):
        indexes = np.random.choice(X.shape[0], self.neurons_nb_, replace=False)
        self.neurons_ = X[indexes]

        for _ in range(epochs):
            for x in X:
                # Determine "winner" neuron
                min_d = np.inf
                winner_idx = None

                for i in range(0, self.neurons_nb_):
                    d = self.hellinger_d(x, self.neurons_[i])
                    if d < min_d:
                        min_d = d
                        winner_idx = i
                
                # Updating "winner" neuron
                self.neurons_[winner_idx] += learning_rate * (x - self.neurons_[winner_idx])
            
        
        print(self.neurons_)
    

    @staticmethod
    def hellinger_d(v1:np.ndarray, v2:np.ndarray):
        # Calculating Hellinger distance
        return np.sqrt(np.sum((np.sqrt(v1) - np.sqrt(v2))**2)) / np.sqrt(2)


In [5]:
df = generate_dataset(100)

In [6]:
df

array([[0.3282, 0.336 , 0.3358],
       [0.3283, 0.3352, 0.3365],
       [0.3287, 0.3377, 0.3336],
       [0.    , 0.0983, 0.9017],
       [0.3343, 0.3316, 0.3341],
       [0.3308, 0.3344, 0.3348],
       [0.3315, 0.3371, 0.3314],
       [0.3274, 0.3399, 0.3327],
       [0.3294, 0.3317, 0.3389],
       [0.3385, 0.3314, 0.3301],
       [0.334 , 0.3331, 0.3329],
       [0.33  , 0.33  , 0.34  ],
       [0.3364, 0.3309, 0.3327],
       [0.3295, 0.3348, 0.3357],
       [0.    , 0.1032, 0.8968],
       [0.    , 0.0977, 0.9023],
       [0.335 , 0.3294, 0.3356],
       [0.3244, 0.3382, 0.3374],
       [0.3379, 0.3297, 0.3324],
       [0.    , 0.104 , 0.896 ],
       [0.    , 0.0983, 0.9017],
       [0.3309, 0.3345, 0.3346],
       [0.3388, 0.3296, 0.3316],
       [0.3291, 0.3403, 0.3306],
       [0.    , 0.1036, 0.8964],
       [0.34  , 0.3267, 0.3333],
       [0.3342, 0.3344, 0.3314],
       [0.3287, 0.3419, 0.3294],
       [0.3378, 0.3396, 0.3226],
       [0.3333, 0.3374, 0.3293],
       [0.

In [7]:
vq_model = VQ()

In [8]:
vq_model.fit(df, epochs=100)

[[0.32196222 0.3419643  0.33607348]
 [0.         0.10143599 0.89856401]]


In [9]:
vq_model.neurons_

array([[0.32196222, 0.3419643 , 0.33607348],
       [0.        , 0.10143599, 0.89856401]])