In [1]:
import numpy as np
import pandas as pd
import math
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.metrics import homogeneity_score
from sklearn.metrics import completeness_score

In [2]:

def gauss(x, t):
    return np.exp(-(t*x)**2)

def mexican_hat(x, t):
    return abs((2 - 4*(x*t)**2)*np.exp(-(x*t)**2))

In [3]:
class kohonen:
    def __init__(self, N, M, data, width, theta = gauss, grid = 'square'):
        self.N = N #wymiary sieci
        self.M = M
        self.data = data #zbior danych
        self.neurons = np.random.uniform(np.min(data), np.max(data), size=(M*N, data.shape[1])) #inicjowanie wag pokrywających zbiór data
        self.width = width #szerokosc sasiedztwa
        self.theta = theta #funkcja sąsiedztwa
        self.iter = 0 #odbyte iteracje uczenia, pamiętanie iteracji
        self.grid = grid #architektura siatki
        
    def dist(self, x, y):
        if self.grid == 'square':
            return self.width*math.sqrt((x%self.N - y%self.N)**2 + (math.floor(x/self.N) - math.floor(y/self.N))**2)
        if self.grid == 'hex':
            return self.width*math.sqrt((x%self.N - y%self.N)**2 + (math.floor(x/self.N) - math.floor(y/self.N)*math.sqrt(3)/2)**2)
    
    def alpha(self, t, lambda_):
        return np.exp(-t/lambda_)
    
    def train(self, lambda_):
        
        for t in range(lambda_):
            perm = np.random.permutation(self.data.shape[0])
            for x in self.data[perm]:
                closest_neuron_index = np.argmin([np.linalg.norm(x - neuron) for neuron in self.neurons])
                for i, neuron in enumerate(self.neurons):
                    self.neurons[i] += self.theta(self.dist(closest_neuron_index, i), self.iter + t)*self.alpha(self.iter + t, self.iter + lambda_)*(x - neuron)
        self.iter += lambda_
    

    def classification(self):
        return [np.argmin([np.linalg.norm(neuron - x) for neuron in self.neurons]) for x in self.data]
    
    
    def set_nodes(self, n): #naiwny clustering wytrenowanych neuronów
        while (len(self.neurons) > n):
            dist = np.array([np.linalg.norm(n1 - n2) for n1 in self.neurons for n2 in self.neurons])
            dist = dist[dist != 0]
            clustered_neuron = np.unravel_index(dist.argmin(), (len(self.neurons), len(self.neurons) - 1))[0]
            self.neurons = np.delete(self.neurons, clustered_neuron, 0)
    

In [4]:
d = pd.read_csv('./mnist.csv')

In [41]:
x = np.asarray(d.iloc[0:500,1:785])

In [42]:
x.shape

(500, 784)

In [16]:
data1

Unnamed: 0,x,y,c
0,2.402047,3.728695,1
1,-0.660032,7.047206,0
2,2.202839,-2.394303,2
3,3.179183,2.593885,1
4,3.173320,-2.152688,2
...,...,...,...
595,-2.186246,-4.059754,4
596,4.089715,-2.879358,2
597,-3.014489,-2.824257,4
598,3.746346,3.001876,1


In [135]:
nn = kohonen(4, 4, x, 1)
nn.train(10)

In [136]:
nn.train(10)

In [139]:
nn.set_nodes(10)
c = nn.classification()

In [140]:
homogeneity_score(d['label'].iloc[0:500], c)

0.3514465013038948

In [142]:
completeness_score(d['label'].iloc[0:500], c)

0.39781205056877744

In [18]:
d['label'].iloc[0:100]

0     1
1     0
2     1
3     4
4     0
     ..
95    9
96    1
97    2
98    0
99    5
Name: label, Length: 100, dtype: int64

In [152]:
%%time
nn = kohonen(2, 5, x, 1)
nn.train(1)

Wall time: 13.1 s


In [156]:
%%time
nn = kohonen(2, 5, x, 1, theta = mexican_hat)
nn.train(1)

Wall time: 10.7 s


In [157]:
%%time
nn = kohonen(2, 5, x, 1, grid = 'hex')
nn.train(1)

Wall time: 11.8 s


In [158]:
%%time
nn = kohonen(2, 5, x, 1, theta = mexican_hat, grid = 'hex')
nn.train(1)

Wall time: 12.1 s


In [145]:
max(c)

24

In [151]:
x = np.asarray(d.iloc[:,1:785])
x.shape

(42000, 784)

In [146]:
nn.set_nodes(10)
c = nn.classification()
homogeneity_score(d['label'].iloc[0:500], c)

0.28272541091168824

In [149]:
set(c)

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}

In [4]:
d2 = pd.read_csv('./UCI HAR Dataset/train/X_train.txt', header = None, sep="\s+")
x2 = np.asarray(d2)
x2.shape

(7352, 561)

In [5]:
labels = pd.read_csv('./UCI HAR Dataset/train/y_train.txt', header = None, sep="\s+")

In [6]:
labels = np.asarray(labels[0])
max(labels)

6

In [10]:
%%time
nn = kohonen(2, 3, x2, 1, theta = mexican_hat, grid = 'hex')
nn.train(30)

Wall time: 40.9 s


In [11]:
c = nn.classification()
homogeneity_score(labels, c)

0.6026584180497051