In [1]:
import numpy as np
from minisom import MiniSom 
from datetime import datetime
from itertools import chain
import matplotlib.pyplot as plt
from collections import defaultdict

In [2]:
# DATASETS
clusters10_data = np.genfromtxt('10clusters.vec', skip_header=4, usecols=list(range(10)))
chainlink_data = np.genfromtxt('chainlink.vec', skip_header=4, usecols=list(range(3)))

In [3]:
# Clusters10
clusters10_som_small = MiniSom(20, 25, 10, sigma=0.3, learning_rate=0.5)
clusters10_som_small.train_random(clusters10_data, 100)
clusters10_som_big = MiniSom(100, 60, 10, sigma=0.3, learning_rate=0.5)
clusters10_som_big.train_random(clusters10_data, 100)

In [3]:
# Chainlink
chainlink_som_small = MiniSom(20, 25, 3, sigma=10, learning_rate=0.5)
chainlink_som_small.train_random(chainlink_data, 10000)
chainlink_som_big = MiniSom(100, 60, 3, sigma=0.3, learning_rate=0.5)
chainlink_som_big.train_random(chainlink_data, 10000)

In [31]:
chainlink_som_small.quantization(chainlink_data)

array([[-0.18054337,  1.42930274,  0.04388023],
       [-0.30473282,  0.33415633, -0.10903533],
       [ 0.21419984,  0.38784149,  0.27410976],
       ...,
       [ 0.03107066, -0.47210335,  0.56089671],
       [ 0.00536775, -0.45294586, -0.03484727],
       [-0.09971906,  0.00985857, -0.5369326 ]])

In [57]:
class SOMFiles:    
    """
    Class for creating SOMLib files out of a MiniSom
    """
    
    def __init__(self, data, som, dataset_name):
        self._data = data
        self._som = som
        self._dataset_name = dataset_name
        self._weights = som.get_weights()
        self._xdmin = self._weights.shape[0]
        self._ydmin = self._weights.shape[1]
        self._distances = som.distance_map()
        self._qe = som.quantization_error(data)
        self._quantization = som.quantization(data)
        
    def _get_header(self):
        """
        returns dict with header information of a SOMLib file
        """
        return {'$TYPE': 'som', '$GRID_LAYOUT': 'rectangular', '$GRID_TOPOLOGY': 'planar', 
                '$XDIM': self._xdmin, '$YDIM': self._ydmin}
        

    def write_map_file(self, filename, sigma=0.3, learning_rate=0.5, iterations=100, seed=1):   
        """
        Write SOMLib map description file
        """
        d = self._get_header()
        d['$VEC_DIM'] = self._data.shape[1]
        d['$STORAGE_DATE'] = datetime.now().strftime("%d/%m/%Y %H:%M %p")
        d['$LEARNRATE_TYPE'] = 'exponential'
        d['$LEARNRATE_INIT'] = learning_rate
        d['$NEIGHBORHOOD_TYPE'] = 'exponential'
        d['$NEIGHBORHOOD_INIT'] = sigma
        d['$RAND_INIT'] = seed
        d['$ITERATIONS_TOTAL'] = iterations
        d['$QUANTERROR_MAP'] = self._qe
                
        with open(filename + ".map","w") as f:
            for key, value in d.items():
                f.write("{} {} \n".format(key, value))
    
    def write_weight_file(self, filename):
        """
        Write SOMLib weight vector file
        """
        d = self._get_header()
        d['$VEC_DIM'] = self._data.shape[1]
        
        with open(filename + ".wgt","w") as f:
            for key, value in d.items():
                f.write("{} {} \n".format(key, value))
                
            for i in range(self._weights.shape[0]):
                for j in range(self._weights.shape[1]):
                    l = list(self._weights[i][j])
                    l.append("SOM_MAP_{}_({}/{})".format(self._dataset_name, i, j))                
                    f.write(' '.join(map(str, l)) + "\n")
    
    def write_unit_file(self, filename):
        """
        Write SOMLib unit description file
        """
        d = self._get_header()      
        winner_dict = self._map_data_to_winner()
        
        with open(filename + ".unit","w") as f:
            for key, value in d.items():
                f.write("{} {} \n".format(key, value))
                for i in range(self._weights.shape[0]):
                    for j in range(self._weights.shape[1]):
                        f.write("$XDMIN {}\n".format(i))
                        f.write("$YDMIN {}\n".format(j))
                        f.write("$ZDMIN 1\n")
                        f.write("$UNIT_ID {}_({}/{})\n".format(self._dataset_name, i, j))
                        
                        if (i,j) in winner_dict:
                            qes = [self._qe_unit(idx - 1) for idx in winner_dict[(i,j)]]
                            f.write("$QUANTERROR_UNIT {}\n".format(np.sum(qes)))
                            f.write("$QUANTERROR_UNIT_AVG {}\n".format(np.mean(qes)))
                            f.write("$NR_VEC_MAPPED {}\n".format(len(qes)))
                            f.write("$MAPPED_VECS\n")
                            for idx in winner_dict[(i,j)]:
                                f.write(str(idx) + "\n")
                            f.write("$MAPPED_VECS_DIST " + ' '.join(map(str, qes)) + "\n")
                            
                        else:
                            f.write("$QUANTERROR_UNIT 0.0\n")
                            f.write("$QUANTERROR_UNIT_AVG 0.0\n")
                            f.write("$NR_VEC_MAPPED 0\n")

    def _qe_unit(self,idx):
        """
        returns qe for a single input vector
        """
        return np.mean(np.power(self._quantization[idx] - self._data[idx],2))
           
    def _map_data_to_winner(self):
        """
        returns dict with idx of input vectors mapped to winner units
        """
        d = defaultdict(list)
        for i, vec in enumerate(self._data):
            d[self._som.winner(vec)].append(i + 1)
            
        return d
        

In [56]:
desc = SOMFiles(chainlink_data, chainlink_som_small, 'chainlink')
desc.write_map_file("map_test")
desc.write_weight_file("weight_test")
desc.write_unit_file("unit_test")