In [43]:
import numpy as np
from minisom import MiniSom 
from datetime import datetime

In [3]:
# DATASETS
clusters10_data = np.genfromtxt('10clusters.vec', skip_header=4, usecols=list(range(10)))
chainlink_data = np.genfromtxt('chainlink.vec', skip_header=4, usecols=list(range(3)))

In [12]:
# Clusters10
clusters10_som_small = MiniSom(20, 25, 10, sigma=0.3, learning_rate=0.5)
clusters10_som_small.train_random(clusters10_data, 100)
clusters10_som_big = MiniSom(100, 60, 10, sigma=0.3, learning_rate=0.5)
clusters10_som_big.train_random(clusters10_data, 100)

In [13]:
# Chainlink
chainlink_som_small = MiniSom(20, 25, 3, sigma=0.3, learning_rate=0.5)
chainlink_som_small.train_random(chainlink_data, 100)
chainlink_som_big = MiniSom(100, 60, 3, sigma=0.3, learning_rate=0.5)
chainlink_som_big.train_random(chainlink_data, 100)

In [14]:
weights = chainlink_som_small.get_weights()
chainlink_som_small.quantization_error(chainlink_data)

In [33]:
chainlink_som_small.quantization(chainlink_data)

array([[-4.12744166e-02,  1.97233513e+00, -2.81833018e-04],
       [-6.08112926e-01,  2.39237847e-01, -1.09788352e-02],
       [ 4.90661245e-01,  1.33191339e-01, -2.77097927e-02],
       ...,
       [ 6.16642722e-02, -4.10770107e-01,  9.09651272e-01],
       [ 6.09325663e-02, -9.42491766e-01, -2.82333629e-01],
       [ 6.67673675e-02,  2.58332031e-01, -9.63746170e-01]])

In [24]:
distances = chainlink_som_small.distance_map()

In [36]:
distances.shape

(20, 25)

In [28]:
chainlink_som_small.win_map(chainlink_data)

defaultdict(list,
            {(0, 14): [array([-0.27412 ,  1.9674  , -0.035685]),
              array([-0.34884 ,  1.8709  , -0.035219]),
              array([-0.30764 ,  1.9859  ,  0.076412]),
              array([ 0.17684 ,  1.9742  , -0.017804]),
              array([ 0.13951 ,  2.0007  , -0.011744]),
              array([-0.42526 ,  2.0215  , -0.025187]),
              array([-0.084043,  1.9962  , -0.033255]),
              array([ 0.20304 ,  2.0461  , -0.033991]),
              array([-0.22899  ,  1.9935   ,  0.0036271]),
              array([-8.8768e-02,  2.0179e+00, -3.7963e-04]),
              array([0.06213  , 1.9685   , 0.0093558]),
              array([0.223   , 1.8685  , 0.022343]),
              array([0.34711 , 1.8828  , 0.076542]),
              array([-0.405   ,  1.8674  ,  0.051174]),
              array([ 0.19635 ,  1.9396  , -0.038935]),
              array([-0.29094,  1.9498 , -0.05797]),
              array([0.31086 , 1.9062  , 0.041474]),
              array([-0.

In [39]:
chainlink_data.shape

(1000, 3)

In [47]:
def get_map_description(data, som, sigma=0.3, learning_rate=0.5, iterations=100, seed=1):
    weights = som.get_weights()
    distances = som.distance_map()
    win_map = som.win_map(data)
    qe = som.quantization_error(data)
    quantization = som.quantization(data)
    
    d = {}
    d['$TYPE'] = 'som'
    d['$TOPOLOGY'] = 'rect'
    d['$XDIM'] = weights.shape[0]
    d['$YDIM'] = weights.shape[1]
    d['$ZDIM'] = 1
    d['$VEC_DIM'] = data.shape[1]
    d['$STORAGE_DATE'] = datetime.now().strftime("%d/%m/%Y %H:%M %p")
    d['$LEARNRATE_TYPE'] = 'exponential'
    d['$LEARNRATE_INIT'] = learning_rate
    d['$NEIGHBORHOOD_TYPE'] = 'exponential'
    d['$NEIGHBORHOOD_INIT'] = sigma
    d['$RAND_INIT'] = seed
    d['$ITERATIONS_TOTAL'] = iterations
    d['$QUANTERROR_MAP'] = qe
    
    return d

get_map_description(chainlink_data, chainlink_som_small)

{'$TYPE': 'som',
 '$TOPOLOGY': 'rect',
 '$XDIM': 20,
 '$YDIM': 25,
 '$ZDIM': 1,
 '$VEC_DIM': 3,
 '$STORAGE_DATE': '09/01/2020 12:14 PM',
 '$LEARNRATE_TYPE': 'exponential',
 '$LEARNRATE_INIT': 0.5,
 '$NEIGHBORHOOD_TYPE': 'exponential',
 '$NEIGHBORHOOD_INIT': 0.3,
 '$RAND_INIT': 1,
 '$ITERATIONS_TOTAL': 100}