## Anna Urbala - PD8

In [1]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import numpy as np
import math
from sklearn import metrics

In [25]:
def gauss(x, t):
    return math.exp(-(x * t)**2)

def ricker_wavelet(x, t):
    return math.exp(-((x * t)**2)) * (2-4*((x*t)**2)) * t * t

def ricker_wavelet_mod(x, t):
    return max(ricker_wavelet(x, 1), 0)

class Kohonen:
    def __init__(self, M, N, data, width=1, topology="square"):
        self.M = M
        self.N = N
        self.data = data 
        self.neurons = np.random.normal(0, np.max(data), size=(N*M, data.shape[1]))
        self.width = width
        self.norm_fun = np.linalg.norm
        self.topology = topology
        
    @staticmethod
    def alpha(t, l):
        return math.exp(-t/l)
    
    def get_classes(self):
        return [np.argmin([self.norm_fun(n-x) for n in self.neurons]) for x in self.data]
    
    def dist(self, n1, n2):
        (i1, i2, j1, j2) = (n1 % self.N, n2 % self.N, math.floor(n1 / self.N), math.floor(n2 / self.N))
        dist = self.norm_fun([i1 - i2, j1 - j2])
        if self.topology == "hex":
            x = 1.5 * (j1 - j2)
            y = (i1 - i2) * math.sqrt(3) + ((j1 % 2) - (j2 % 2)) * math.sqrt(3) / 2.0
            dist = self.norm_fun([x, y])
        return self.width * dist
    
    def learn(self, l, neigh_fun=gauss):
        for t in range(l):
            tmp = np.random.permutation(len(self.data))
            permutation = self.data[tmp]
            for row in permutation:
                n_min = np.argmin([self.norm_fun(row-n) for n in self.neurons])
                for i, n in enumerate(self.neurons):
                    self.neurons[i] += neigh_fun(self.dist(n_min, i), t+1) * self.alpha(t + 1, l)*(row - n)
        return self.get_classes()

### MNIST

In [27]:
from keras.datasets import mnist
_, (train_X, train_y) = mnist.load_data()
print(train_X.shape)
print(train_y.shape)

(10000, 28, 28)
(10000,)


In [28]:
train_X = train_X.reshape(train_X.shape[0], 784).astype('float32')
train_X.shape

(10000, 784)

##### gauss, width=1, MxN=25, square

In [35]:
np.random.seed(2137)
som = Kohonen(5,5, train_X)
c = som.learn(10)
print("completeness:", metrics.completeness_score(train_y, c))
print("homogeneity:", metrics.homogeneity_score(train_y, c))
print(len(np.unique(c)))

completeness: 0.40712855958537647
homogeneity: 0.5439305676561444
25


##### gauss, width=1, MxN=25, hex

In [36]:
np.random.seed(2137)
som = Kohonen(5,5,train_X,topology="hex")
c = som.learn(10)
print("completeness:", metrics.completeness_score(train_y, c))
print("homogeneity:", metrics.homogeneity_score(train_y, c))
print(len(np.unique(c)))

completeness: 0.42495594935780157
homogeneity: 0.5662181062100237
25


##### ricker_wavelet_mod, width=1, MxN=25, square

In [37]:
np.random.seed(2137)
som = Kohonen(5,5, train_X)
c = som.learn(10, neigh_fun=ricker_wavelet_mod)
print("completeness:", metrics.completeness_score(train_y, c))
print("homogeneity:", metrics.homogeneity_score(train_y, c))
print(len(np.unique(c)))

completeness: 0.2937020652632871
homogeneity: 0.1175663232160283
3


##### ricker_wavelet_mod, width=1, MxN=25, hex

In [38]:
np.random.seed(2137)
som = Kohonen(5,5,train_X,topology="hex")
c = som.learn(10, neigh_fun=ricker_wavelet_mod)
print("completeness:", metrics.completeness_score(train_y, c))
print("homogeneity:", metrics.homogeneity_score(train_y, c))
print(len(np.unique(c)))

completeness: 0.2937020652632871
homogeneity: 0.1175663232160283
3


### UCI

In [56]:
y = pd.read_csv('mio2/y_test.txt', header=None)
x = pd.read_csv('mio2/X_test.txt', sep="\s+", header=None)
x = np.asarray(x)
y = np.asarray(y).reshape(2947,)
x.shape

(2947, 561)

##### gauss, width=1, MxN=25, square

In [57]:
np.random.seed(2137)
som = Kohonen(5,5, x)
c = som.learn(10)
print("completeness:", metrics.completeness_score(y, c))
print("homogeneity:", metrics.homogeneity_score(y, c))
print(len(np.unique(c)))

completeness: 0.44072982345772654
homogeneity: 0.7088760602825457
25


##### gauss, width=1, MxN=25, hex

In [58]:
np.random.seed(2137)
som = Kohonen(5,5,x,topology="hex")
c = som.learn(10)
print("completeness:", metrics.completeness_score(y, c))
print("homogeneity:", metrics.homogeneity_score(y, c))
print(len(np.unique(c)))

completeness: 0.45915329875552546
homogeneity: 0.7315996843080869
25


##### ricker_wavelet_mod, width=1, MxN=25, square

In [59]:
np.random.seed(2137)
som = Kohonen(5,5, x)
c = som.learn(10, neigh_fun=ricker_wavelet_mod)
print("completeness:", metrics.completeness_score(y, c))
print("homogeneity:", metrics.homogeneity_score(y, c))
print(len(np.unique(c)))

completeness: 0.9110224703385262
homogeneity: 0.5174053190542235
3


##### ricker_wavelet_mod, width=1, MxN=25, hex

In [60]:
np.random.seed(2137)
som = Kohonen(5,5,x,topology="hex")
c = som.learn(10, neigh_fun=ricker_wavelet_mod)
print("completeness:", metrics.completeness_score(y, c))
print("homogeneity:", metrics.homogeneity_score(y, c))
print(len(np.unique(c)))

completeness: 0.9110224703385262
homogeneity: 0.5174053190542235
3
