In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import v_measure_score
from sklearn.metrics.cluster import contingency_matrix

In [2]:
class kohonen:

    def __init__(
        self,
        M,
        N,
        X,
        learning_rate=0.1,
        gauss_neighborhood_function=True,
        neighborhood_proximity=1.0,
        lattice="rectangular",
    ):
        self.M = M
        self.N = N
        self.learning_rate = learning_rate
        self.input_dim = X.shape[1]
        if gauss_neighborhood_function:
            self.neighborhood_function = self.gauss_neighborhood
        else:
            self.neighborhood_function = self.mexican_hat_neighborhood
        self.neighborhood_proximity = neighborhood_proximity
        # its not working like this
        # self.lattice_distances = np.zeros((M, N))
        self.calculate_lattice_distances(M, N, lattice)
        self.weights = np.random.uniform(0, 1, (self.input_dim, M, N))
        minimum = np.min(X, axis=0)
        maximum = np.max(X, axis=0)
        for i in range(self.input_dim):
            self.weights[i] = minimum[i] + (maximum[i] - minimum[i]) * self.weights[i]

    def calculate_lattice_distances(self, M, N, lattice):
        if lattice == "rectangular":
            pass
        elif lattice == "hexagonal":
            pass
        else:
            raise ValueError("Invalid lattice type")

    def gauss_neighborhood(self, t, T):
        return np.exp(-np.power(t * T * self.neighborhood_proximity, 2))

    def mexican_hat_neighborhood(self, t, T):
        return np.exp(-np.power(t * T * self.neighborhood_proximity, 2)) * (
            2 - 4 * np.power(t * T * self.neighborhood_proximity, 2)
        )

    def euclidean_distance(self, x, y):
        return np.linalg.norm(x - y)

    def decay(self, t, iteration_num):
        return self.learning_rate * np.exp(-t / iteration_num)

    def train(self, X, max_epochs):
        for epoch in range(max_epochs):
            X = np.random.permutation(X)
            decay_value = self.decay(epoch, max_epochs)
            for x in X:
                idx = np.argmin(np.linalg.norm(self.weights.T - x, axis=2).T)
                bmu = np.unravel_index(idx, self.weights[0].shape)

                for i in range(self.M):
                    for j in range(self.N):
                        distance_on_lattice = self.euclidean_distance(
                            np.array(bmu), np.array([i, j])  # old approach change it
                        )
                        self.weights[:, i, j] += (
                            decay_value
                            * self.neighborhood_function(distance_on_lattice, epoch)
                            * (x - self.weights[:, i, j])
                        )

    def predict(self, X):
        return np.array(
            [
                [
                    np.argmin(
                        [
                            self.euclidean_distance(x, self.weights[:, i, j])
                            for i in range(self.M)
                            for j in range(self.N)
                        ]
                    )
                    for x in X
                ]
            ]
        ).T

## tests with numpy and speed 

In [3]:
cube = pd.read_csv("./../data/kohonen/cube.csv")
cube_x = cube.drop("c", axis=1).to_numpy()
kohon = kohonen(2, 4, cube_x)
weights = kohon.weights
weights

array([[[ 4.45124426, -0.85807422,  2.02991602,  2.15694688],
        [-1.25335437,  3.74992866,  0.72209788, -0.09278415]],

       [[ 0.49612447,  4.4536387 ,  1.51566875,  1.45954145],
        [-0.21505229,  1.95818308,  3.46373891, -0.7811021 ]],

       [[ 3.16589351,  2.24508445,  2.71556114, -0.50183024],
        [ 2.05301053, -1.4736095 , -0.97559669,  0.88790141]]])

In [4]:
x = cube_x[40]
for i in range(2):
    for j in range(4):
        distance = kohon.euclidean_distance(x, weights[:, i, j])
        print(f"Distance from {i}, {j}: {distance}")

Distance from 0, 0: 5.159113826822259
Distance from 0, 1: 6.342749750129586
Distance from 0, 2: 4.0814108042704715
Distance from 0, 3: 3.0316573921578085
Distance from 1, 0: 3.1139861701137486
Distance from 1, 1: 4.556442578841188
Distance from 1, 2: 4.771261819072919
Distance from 1, 3: 1.3750035047045117


In [5]:
np.linalg.norm(weights.T - x, axis=2).T

array([[5.15911383, 6.34274975, 4.0814108 , 3.03165739],
       [3.11398617, 4.55644258, 4.77126182, 1.3750035 ]])

In [6]:
idx = np.argmin(np.linalg.norm(weights.T - x, axis=2).T)
np.unravel_index(idx, weights[0].shape)

(1, 3)

### Testing if i broke something 

In [17]:
kohon = kohonen(2, 4, cube_x)
kohon.train(cube_x, 10)
cube["predicted"] = kohon.predict(cube_x)
v_measure_score(cube["c"], cube["predicted"])

0.9213659041726133