In [1]:
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns

In [2]:
def generate_dataset(n_samples):
    p_1 = [1/3, 1/3, 1/3]
    p_2 = [0/10, 0/10, 10/10]

    X = np.random.binomial(size=n_samples, n=1, p=0.2)
    repeat_nb = 10000

    data = []

    for i in range(0, n_samples):
        if X[i] == 0:
            # first class
            data.append(np.random.multinomial(repeat_nb, p_1)/repeat_nb)
        else:
            # second class
            data.append(np.random.multinomial(repeat_nb, p_2)/repeat_nb)

    return np.array(data)

In [3]:
class VQ:
    def __init__(self):
        self.neurons_ = None
        self.neurons_nb_ = 2

    
    def fit(self, X:np.ndarray, epochs, learning_rate=0.9):
        self.neurons_ = np.random.dirichlet(np.ones(X.shape[1]), size=self.neurons_nb_)

        for _ in range(epochs):
            for x in X:
                # Determine "winner" neuron
                min_d = np.inf
                winner_idx = None

                for i in range(0, self.neurons_nb_):
                    d = self.hellinger_d(x, self.neurons_[i])
                    if d < min_d:
                        min_d = d
                        winner_idx = i
                
                # Updating "winner" neuron
                self.neurons_[winner_idx] += learning_rate * (x - self.neurons_[winner_idx])
            
        
        print(self.neurons_)
    

    @staticmethod
    def hellinger_d(v1:np.ndarray, v2:np.ndarray):
        # Calculating Hellinger distance
        return np.sqrt(np.sum((np.sqrt(v1) - np.sqrt(v2))**2)) / np.sqrt(2)


In [4]:
df = generate_dataset(100)

In [5]:
df

array([[0.3319, 0.3355, 0.3326],
       [0.3459, 0.327 , 0.3271],
       [0.3331, 0.343 , 0.3239],
       [0.3299, 0.3309, 0.3392],
       [0.333 , 0.3323, 0.3347],
       [0.3373, 0.3244, 0.3383],
       [0.3344, 0.3284, 0.3372],
       [0.3329, 0.3332, 0.3339],
       [0.3187, 0.3411, 0.3402],
       [0.3459, 0.3205, 0.3336],
       [0.3301, 0.3333, 0.3366],
       [0.3317, 0.3337, 0.3346],
       [0.    , 0.    , 1.    ],
       [0.3361, 0.3288, 0.3351],
       [0.3314, 0.3353, 0.3333],
       [0.3344, 0.3321, 0.3335],
       [0.3348, 0.338 , 0.3272],
       [0.336 , 0.3293, 0.3347],
       [0.3318, 0.3337, 0.3345],
       [0.3235, 0.3408, 0.3357],
       [0.3426, 0.329 , 0.3284],
       [0.3389, 0.3264, 0.3347],
       [0.3328, 0.3319, 0.3353],
       [0.    , 0.    , 1.    ],
       [0.    , 0.    , 1.    ],
       [0.    , 0.    , 1.    ],
       [0.3372, 0.3236, 0.3392],
       [0.3321, 0.3298, 0.3381],
       [0.3328, 0.3344, 0.3328],
       [0.3395, 0.3335, 0.327 ],
       [0.

In [6]:
vq_model = VQ()

In [7]:
vq_model.fit(df, epochs=100)

[[0.         0.         1.        ]
 [0.33225361 0.33202249 0.3357239 ]]


In [8]:
vq_model.neurons_

array([[0.        , 0.        , 1.        ],
       [0.33225361, 0.33202249, 0.3357239 ]])

In [9]:
np.sum(vq_model.neurons_, axis=1)

array([1., 1.])