In [1]:
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns

from abc import ABC, abstractmethod

In [2]:
def hellinger_d(v1:np.ndarray, v2:np.ndarray):
    # Calculating Hellinger distance
    return np.sqrt(np.sum((np.sqrt(v1) - np.sqrt(v2))**2)) / np.sqrt(2)

In [3]:
class Neuron:
    def __init__(self, weight, coord):
        self.weight_ = weight
        self.coord_ = coord

In [4]:
class Lattice(ABC):
    def __init__(self, size):
        self.size_ = size
        self.neurons_ = None
        self.neurons_nb_ = None

        self.generate()

    @abstractmethod
    def generate(self):
        pass
    
    @abstractmethod
    def distance_map(self):
        pass

class RectangularLattice(Lattice):
    def generate(self):
        self.neurons_ = []
        self.neurons_nb_ = self.size_[0] * self.size_[1]

        for i in range(0, self.size_[0]):
            for j in range(0, self.size_[1]):
                self.neurons_.append(Neuron(None, np.array([i, j])))  # Assuming Neuron class is defined


    def distance_map(self):
        """Returns the distance map of the weights.
        Each cell is the normalized sum of the distances between
        a neuron and its neighbours."""

        u_matrix = np.zeros(self.size_)

        height = self.size_[0]
        width = self.size_[1]

        for i in range(0, height):
            for j in range(0, width):
                distance = 0
                current_neuron = self.neurons_[i*width + j]

                if i > 0:
                    distance += hellinger_d(current_neuron.weight_, self.neurons_[(i-1)*width + j].weight_)
                if i < height - 1:
                    distance += hellinger_d(current_neuron.weight_, self.neurons_[(i+1)*width + j].weight_)
                if j > 0:
                    distance += hellinger_d(current_neuron.weight_, self.neurons_[i*width + j - 1].weight_)
                if j < width - 1:
                    distance += hellinger_d(current_neuron.weight_, self.neurons_[i*width + j + 1].weight_)

                u_matrix[i, j] = distance
        
        return u_matrix / np.max(u_matrix)



class HexagonalLattice(Lattice):
    def generate(self):
        self.neurons_ = []
        self.neurons_nb_ = self.size_[0] * self.size_[1]

        height = self.size_[0]
        width = self.size_[1]

        for i in range(height):
            for j in range(width):
                x = j * 1.5
                y = i * np.sqrt(3)
                if j % 2 == 1:
                    y -= np.sqrt(3) / 2

                self.neurons_.append(Neuron(None, np.array([x, y])))  # Assuming Neuron class is defined

    def distance_map(self):
        """Returns the distance map of the weights.
        Each cell is the normalized sum of the distances between
        a neuron and its neighbors."""

        pass


In [5]:
class SOM:
    def __init__(self, lattice:Lattice):
        self.lattice_ = lattice
              
    def fit(self, X:np.ndarray, epochs):
        neurons_nb = self.lattice_.neurons_nb_
        neurons = self.lattice_.neurons_

        indexes = np.random.choice(X.shape[0], neurons_nb, replace=False)

        for i in range(0, neurons_nb):
            neurons[i].weight_ = X[indexes[i]]

        for ep in range(epochs):
            for x in X:
                # Find BMU
                min_d = np.inf
                winner_idx = None

                for i in range(0, neurons_nb):
                    d = self.hellinger_d(x, neurons[i].weight_)
                    if d < min_d:
                        min_d = d
                        winner_idx = i

                h0 = np.exp(-ep*0.001)
                sigma = np.exp(-ep*0.01)

                # Updating all neurons
                for i in range(0, neurons_nb):
                    learning_rate = self.neighbourhood_func(neurons[i].coord_, neurons[winner_idx].coord_, h0, sigma)
                    neurons[i].weight_ +=  learning_rate * (x - neurons[i].weight_)

                

    @staticmethod
    def neighbourhood_func(r_i, r_c, h0, sigma):
        distance = np.sum(np.square(r_i - r_c))

        return h0 * np.exp(-distance/(2*sigma**2))

    @staticmethod
    def hellinger_d(v1:np.ndarray, v2:np.ndarray):
        # Calculating Hellinger distance
        return np.sqrt(np.sum((np.sqrt(v1) - np.sqrt(v2))**2)) / np.sqrt(2)



In [6]:
def generate_dataset(n_samples, p):
    # p - vector of probability distributions

    distributions_nb = p.shape[0]
    repeat_nb = 10000

    data = []

    for i in range(0, n_samples):
        indexes = np.random.choice(distributions_nb, replace=False, size=2).astype(np.uint)

        sample = []
        for k in indexes:
            sample.append(np.random.multinomial(repeat_nb, p[k])/repeat_nb)
        
        data.append(sample)
    
    return np.array(data).reshape(n_samples, -1)

In [7]:
p = np.array([[1/3, 1/3, 1/3],
              [1/10, 1/10, 8/10],
              [1/4, 1/4, 1/2],
              [2/5, 1/5, 2/5]])

In [8]:
X = generate_dataset(5000, p)

In [9]:
X

array([[0.3944, 0.1996, 0.406 , 0.0983, 0.0993, 0.8024],
       [0.2442, 0.253 , 0.5028, 0.1051, 0.1009, 0.794 ],
       [0.2479, 0.2529, 0.4992, 0.3901, 0.1963, 0.4136],
       ...,
       [0.3318, 0.3433, 0.3249, 0.4033, 0.1986, 0.3981],
       [0.4025, 0.1954, 0.4021, 0.0997, 0.1   , 0.8003],
       [0.2476, 0.2512, 0.5012, 0.3988, 0.1953, 0.4059]])

In [10]:
lattice = HexagonalLattice((3, 4))
som = SOM(lattice)

0.0 0.0
1.5 -0.8660254037844386
3.0 0.0
4.5 -0.8660254037844386
0.0 1.7320508075688772
1.5 0.8660254037844386
3.0 1.7320508075688772
4.5 0.8660254037844386
0.0 3.4641016151377544
1.5 2.598076211353316
3.0 3.4641016151377544
4.5 2.598076211353316


In [11]:
som.fit(X, 300)

In [12]:
for neuron in lattice.neurons_:
    min_distance = np.inf
    indexes = None

    for i in range(len(p)):
            for j in range(len(p)):
                if i != j:        
                    p_distribution = np.concatenate((p[i], p[j]))
                    distance = SOM.hellinger_d(p_distribution, neuron.weight_)
                    if distance < min_distance:
                        min_distance = distance
                        best_distribution = p_distribution
                        indexes = (i, j)

    print(neuron.weight_)
    print(best_distribution)
    print(indexes)
    print()

[0.25253249 0.24925249 0.49821502 0.09642115 0.09800693 0.80557192]
[0.25 0.25 0.5  0.1  0.1  0.8 ]
(2, 1)

[0.38804779 0.22326911 0.38868309 0.09874441 0.09999337 0.80126223]
[0.4 0.2 0.4 0.1 0.1 0.8]
(3, 1)

[0.10095326 0.09760343 0.80144332 0.25185167 0.24265078 0.50549756]
[0.1  0.1  0.8  0.25 0.25 0.5 ]
(1, 2)

[0.10384432 0.10450361 0.79165207 0.3282339  0.33567584 0.33609026]
[0.1        0.1        0.8        0.33333333 0.33333333 0.33333333]
(1, 0)

[0.33082298 0.33266706 0.33650995 0.24865801 0.25122525 0.50011674]
[0.33333333 0.33333333 0.33333333 0.25       0.25       0.5       ]
(0, 2)

[0.29550013 0.23837178 0.46612809 0.19809461 0.19805433 0.60385107]
[0.4  0.2  0.4  0.25 0.25 0.5 ]
(3, 2)

[0.25334542 0.24752193 0.49913265 0.33535668 0.3337337  0.33090961]
[0.25       0.25       0.5        0.33333333 0.33333333 0.33333333]
(2, 0)

[0.09929644 0.10105806 0.79964549 0.40278563 0.20466256 0.39255181]
[0.1 0.1 0.8 0.4 0.2 0.4]
(1, 3)

[0.33129623 0.34000883 0.32869494 0.4028