In [1]:
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns

from itertools import chain

In [2]:
class Neuron:
    def __init__(self, weight, coord):
        self.weight_ = weight
        self.coord_ = coord

class SOM:
    def __init__(self):
        self.neurons_ = None
        self.neurons_nb_ = 9
        self.generate_lattice()
    

    def generate_lattice(self):
        n = self.neurons_nb_//3
        self.neurons_ = []

        for i in range(0, n):
            for j in range(0, n):
                self.neurons_.append(Neuron(None, np.array([i, j])))
                       

    def fit(self, X:np.ndarray, epochs):
        indexes = np.random.choice(X.shape[0], self.neurons_nb_, replace=False)
        sorted_indexes = np.sort(indexes)

        for i in range(0, self.neurons_nb_):
            self.neurons_[i].weight_ = X[sorted_indexes[i]]


        for ep in range(epochs):
            for x in X:
                # Find BMU
                min_d = np.inf
                winner_idx = None

                for i in range(0, self.neurons_nb_):
                    d = self.hellinger_d(x, self.neurons_[i].weight_)
                    if d < min_d:
                        min_d = d
                        winner_idx = i


                h0 = 1
                sigma = 1/(ep + 1)

                # Updating all neurons
                for i in range(0, self.neurons_nb_):
                    learning_rate = self.neighbourhood_func(self.neurons_[i].coord_, self.neurons_[winner_idx].coord_, h0, sigma)
                    self.neurons_[i].weight_ +=  learning_rate * (x - self.neurons_[i].weight_)

                

    @staticmethod
    def neighbourhood_func(r_i, r_c, h0, sigma):
        distance = np.sum(np.square(r_i - r_c))
        if distance > 1:
            return 0

        return h0 * np.exp(-distance/(2*sigma**2))

    @staticmethod
    def hellinger_d(v1:np.ndarray, v2:np.ndarray):
        # Calculating Hellinger distance
        return np.sqrt(np.sum((np.sqrt(v1) - np.sqrt(v2))**2)) / np.sqrt(2)



In [3]:
def generate_dataset(n_samples, p):
    # p - vector of probability distributions

    distributions_nb = p.shape[0]
    repeat_nb = 10000

    data = []
    indexes_res = []

    for i in range(0, n_samples):
        indexes = np.random.choice(distributions_nb, replace=False, size=2).astype(np.uint)

        sample = []
        for k in indexes:
            sample.append(np.random.multinomial(repeat_nb, p[k])/repeat_nb)
        
        indexes_res.append(indexes)
        data.append(sample)
    
    return np.array(data).reshape(n_samples, -1), indexes_res

In [4]:
p = np.array([[1/3, 1/3, 1/3],
              [1/10, 1/10, 8/10],
              [1/4, 1/4, 1/2],
              [2/5, 1/5, 2/5]])

In [5]:
X, indexes = generate_dataset(1000, p)

In [6]:
X

array([[0.2493, 0.2474, 0.5033, 0.4007, 0.2041, 0.3952],
       [0.244 , 0.2501, 0.5059, 0.3985, 0.2016, 0.3999],
       [0.3285, 0.3364, 0.3351, 0.4006, 0.2002, 0.3992],
       ...,
       [0.3351, 0.337 , 0.3279, 0.1009, 0.0948, 0.8043],
       [0.249 , 0.2566, 0.4944, 0.1011, 0.1023, 0.7966],
       [0.3992, 0.2016, 0.3992, 0.1028, 0.0973, 0.7999]])

In [7]:
som = SOM()

In [8]:
som.fit(X, 100)

In [9]:
for neuron in som.neurons_:
    print(neuron.weight_)
    print()

[0.1048 0.0998 0.7954 0.2482 0.2481 0.5037]

[0.1073 0.1047 0.788  0.4081 0.1911 0.4008]

[0.3353 0.3323 0.3324 0.3988 0.2011 0.4001]

[0.3992 0.2016 0.3992 0.1028 0.0973 0.7999]

[0.3959 0.1939 0.4102 0.25   0.2445 0.5055]

[0.2548 0.2545 0.4907 0.3992 0.1954 0.4054]

[0.3351 0.337  0.3279 0.1009 0.0948 0.8043]

[0.256  0.2516 0.4924 0.3359 0.3377 0.3264]

[0.397  0.199  0.404  0.3317 0.3362 0.3321]

