In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import v_measure_score
from sklearn.metrics.cluster import contingency_matrix

In [3]:
class kohonen:

    def __init__(
        self,
        M,
        N,
        X,
        learning_rate=0.1,
        neighborhood_function="gauss",
        neighborhood_proximity=1.0,
        lattice="rectangular",
    ):
        self.M = M
        self.N = N
        self.learning_rate = learning_rate
        self.input_dim = X.shape[1]
        self.neighborhood_function = neighborhood_function
        self.neighborhood_proximity = neighborhood_proximity
        # its not working like this
        # self.lattice_distances = np.zeros((M, N))
        self.calculate_lattice_distances(M, N, lattice)
        self.weights = np.random.uniform(0, 1, (self.input_dim, M, N))
        minimum = np.min(X, axis=0)
        maximum = np.max(X, axis=0)
        for i in range(self.input_dim):
            self.weights[i] = minimum[i] + (maximum[i] - minimum[i]) * self.weights[i]

    def gauss_neighborhood(self, t, T):
        return np.exp(-np.power(t * T * self.neighborhood_proximity, 2))

    def calculate_lattice_distances(self, M, N, lattice):
        if lattice == "rectangular":
            pass
        elif lattice == "hexagonal":
            pass
        else:
            raise ValueError("Invalid lattice type")

    def mexican_hat_neighborhood(self, t, T):
        return np.exp(-np.power(t * T * self.neighborhood_proximity, 2)) * (
            2 - 4 * np.power(t * T * self.neighborhood_proximity, 2)
        )

    def euclidean_distance(self, x, y):
        return np.linalg.norm(x - y)

    def decay(self, t, iteration_num):
        return self.learning_rate * np.exp(-t / iteration_num)

    def train(self, X, max_epochs):
        for epoch in range(max_epochs):
            X = np.random.permutation(X)
            decay_value = self.decay(epoch, max_epochs)
            for x in X:
                idx = np.argmin(np.linalg.norm(self.weights.T - x, axis=2).T)
                bmu = np.unravel_index(idx, self.weights[0].shape)

                for i in range(self.M):
                    for j in range(self.N):
                        distance_on_lattice = self.euclidean_distance(
                            np.array(bmu), np.array([i, j])  # old approach change it
                        )
                        if self.neighborhood_function == "gauss":
                            self.weights[:, i, j] += (
                                decay_value
                                * self.gauss_neighborhood(distance_on_lattice, epoch)
                                * (x - self.weights[:, i, j])
                            )
                        elif self.neighborhood_function == "mexican_hat":
                            self.weights[:, i, j] += (
                                decay_value
                                * self.mexican_hat_neighborhood(
                                    distance_on_lattice, epoch
                                )
                                * (x - self.weights[:, i, j])
                            )
                        else:
                            raise ValueError("Invalid neighborhood function")

    def predict(self, X):
        return np.array(
            [
                [
                    np.argmin(
                        [
                            self.euclidean_distance(x, self.weights[:, i, j])
                            for i in range(self.M)
                            for j in range(self.N)
                        ]
                    )
                    for x in X
                ]
            ]
        ).T

## tests with numpy and speed 

In [4]:
cube = pd.read_csv("./../data/kohonen/cube.csv")
cube_x = cube.drop("c", axis=1).to_numpy()
kohon = kohonen(2, 4, cube_x)
weights = kohon.weights
weights

array([[[ 0.13264713, -0.42958926,  3.26887622, -0.72481347],
        [ 0.24532299,  0.51038657,  1.17049259,  1.20991392]],

       [[ 2.51401079,  2.85196637,  2.52165771,  0.96726661],
        [ 3.88249407,  0.7059975 , -1.48886976, -1.59955556]],

       [[ 2.12626613,  3.44430267, -0.31301083, -0.29741389],
        [ 1.44464289,  2.66504052, -1.41064882,  1.65133269]]])

In [55]:
x = cube_x[40]
for i in range(2):
    for j in range(4):
        distance = kohon.euclidean_distance(x, weights[:, i, j])
        print(f"Distance from {i}, {j}: {distance}")

Distance from 0, 0: 4.37548443240574
Distance from 0, 1: 5.508811599364567
Distance from 0, 2: 4.485015386335958
Distance from 0, 3: 2.678251572423774
Distance from 1, 0: 5.346013431022207
Distance from 1, 1: 3.3463046502404534
Distance from 1, 2: 1.427812308301886
Distance from 1, 3: 1.7968183290150779


In [56]:
np.linalg.norm(weights.T - x, axis=2).T

array([[4.37548443, 5.5088116 , 4.48501539, 2.67825157],
       [5.34601343, 3.34630465, 1.42781231, 1.79681833]])

In [57]:
idx = np.argmin(np.linalg.norm(weights.T - x, axis=2).T)
np.unravel_index(idx, weights[0].shape)

(1, 2)