# Model One

In [4]:
import numpy as np
from time import sleep, time
import matplotlib.pyplot as plt

In [9]:
class SOM:
    # It's gonna be an array m x n x dim, where m and n are the nodes' grid and
    # dim is the weight's dimension.
    wNodes = None
     
    alpha0 = None # It's the initial learning rate
    sigma0 = None # It's the initial radius
    dataIn = None # The input data
    grid = None   # The lattice of the grid
     
    def __init__ (self, dataIn, grid=[10,10], alpha=0.1, sigma=None):
        dim = dataIn.shape[1]
        self.wNodes = np.random.uniform(-1,1,[grid[0], grid[1], dim])
        #self.wNodes = np.random.randn (grid[0], grid[1], dim)    
         
        self.alpha0 = alpha
        if (sigma is None):
            self.sigma0 = max(grid) / 2.0
        else:
            self.sigma0 = sigma
         
        self.dataIn = np.asarray(dataIn)
        self.grid = grid
         
         
    def train (self, maxIt=100, verbose=True, analysis=False, timeSleep = 0.5):
        nSamples = self.dataIn.shape[0]
        m = self.wNodes.shape[0]        
        n = self.wNodes.shape[1]        
     
     
        # The time constant needs to be computed just one time, so we so it before the loop starts        
        timeCte = (maxIt/np.log(self.sigma0))        
        if analysis:
            print('timeCte = ', timeCte)
             
        timeInit = 0       
        timeEnd = 0
        for epc in range(maxIt):
            # Computing the constants
            alpha = self.alpha0*np.exp(-epc/timeCte)
            sigma = self.sigma0 * np.exp(-epc/timeCte)
             
            if verbose:
                print('Epoch: ', epc, ' - Expected time: ', (timeEnd-timeInit)*(maxIt-epc), ' sec') 
                 
            timeInit = time()
 
            for k in range(nSamples):    
                 
                # Getting the winner node
                matDist = self.distance (self.dataIn[k,:], self.wNodes)
                posWin = self.getWinNodePos(matDist)                              
                 
                deltaW  = 0               
                h = 0   
                           
                 
                for i in range(m):
                    for j in range(n):      
                        # Computing the distance between two nodes
                        dNode = self.getDistanceNodes([i,j],posWin)                       
                         
                         
                        #if dNode <= sigma: 
                             
                        # Computing the winner node's influence
                        h = np.exp ((-dNode**2)/(2*sigma**2))
                         
                        # Updating the weights
                        deltaW = (alpha*h*(self.dataIn[k,:] - self.wNodes[i,j,:]))                       
                        self.wNodes[i,j,:] += deltaW
                             
                        if analysis:  
                            print('Epoch = ', epc) 
                            print('Sample = ', k) 
                            print('-------------------------------') 
                            print('alpha = ', alpha) 
                            print('sigma = ', sigma)                            
                            print('h = ',  h) 
                            print('-------------------------------') 
                            print('Winner Node = [', posWin[0],', ',posWin[1],']') 
                            print('Current Node = [',i,', ',j,']') 
                            print('dist. Nodes = ', dNode) 
                            print('deltaW = ', deltaW)                       
                            print('wNode before = ', self.wNodes[i,j,:]) 
                            print('wNode after = ', self.wNodes[i,j,:] + deltaW) 
                            print('\n')                        
                            sleep(timeSleep) 
                             
            timeEnd = time()                       
         
 
    # This code uses the Euclidean distance. You may change this distance, if you want to.
    # This method computes the distance between the inputs and weights throught the 3D matrix
    def distance (self,a,b):
        return np.sqrt(np.sum((a-b)**2,2,keepdims=True))        
 
    # Method to get the distance between two nodes in the grid
    def getDistanceNodes (self,n1,n2):
        n1 = np.asarray(n1)
        n2 = np.asarray(n2)
        return np.sqrt(np.sum((n1-n2)**2))
         
    # This method gets the position of the winner node     
    def getWinNodePos (self,dists):
        arg = dists.argmin()
        m = dists.shape[0]
        return arg//m, arg%m
         
    # Method to get the centroid of a input data
    def getCentroid (self, data):
        data = np.asarray(data)        
        N = data.shape[0]
        centroids = list()
         
        for k in range(N):
            matDist = self.distance (data[k,:], self.wNodes)
            centroids.append (self.getWinNodePos(matDist))
             
        return centroids
         
    # Methods to save and load trained nodes
    def saveTrainedSOM (self, fileName='trainedSOM.csv'):
        np.savetxt(fileName, self.wNodes)
 
    def setTrainedSOM (self, fileName):
        self.wNodes = np.loadtxt(fileName)
 

In [22]:
#Training inputs for RGBcolors
colors = np.array(
     [[0., 0., 0.],
      [0., 0., 1.],
      [0., 0., 0.5],
      [0.125, 0.529, 1.0],
      [0.33, 0.4, 0.67],
      [0.6, 0.5, 1.0],
      [0., 1., 0.],
      [1., 0., 0.],
      [0., 1., 1.],
      [1., 0., 1.],
      [1., 1., 0.],
      [1., 1., 1.],
      [.33, .33, .33],
      [.5, .5, .5],
      [.66, .66, .66]])
       
colors2 = np.array(
     [[0., 0., 0.],
      [0., 0., 1.],     
      [1., 1., 0.],
      [1., 1., 1.],     
      [1., 0., 0.]])      
       
color_names = \
    ['black', 'blue', 'darkblue', 'skyblue',
     'greyblue', 'lilac', 'green', 'red',
     'cyan', 'violet', 'yellow', 'white',
     'darkgrey', 'mediumgrey', 'lightgrey']
    
s = SOM(colors,[20,30], alpha=0.3)

plt.imshow(s.wNodes)
 
s.train(maxIt=5)
 
plt.imshow(s.wNodes)
plt.show()

Epoch:  0  - Expected time:  0  sec
Epoch:  1  - Expected time:  1.5582551956176758  sec
Epoch:  2  - Expected time:  1.189347267150879  sec
Epoch:  3  - Expected time:  0.7169475555419922  sec
Epoch:  4  - Expected time:  0.3163418769836426  sec


ValueError: Floating point image RGB values must be in the 0..1 range.

<matplotlib.figure.Figure at 0x2405e356d68>

# Model Two

In [3]:
import numpy as np
import sompylib.som_structure as SOM
from matplotlib import pyplot as plt

ModuleNotFoundError: No module named 'sompylib'

In [23]:
msz0 = 50
msz1 = 50
cd = msz0*msz1*1*1
dlen = 100*1000*1*1*1#+224
dim = 3
Data = np.random.randint(0,2,size = (dlen,dim))
# Data = np.random.rand(dlen,dim)

In [None]:
reload(sys.modules['sompylib.som_structure'])
sm = SOM.SOM('sm', Data, mapsize = [msz0, msz1],norm_method = 'var',initmethod='pca')

In [None]:
sm.train(n_job = 2, shared_memory = 'no')

In [None]:
tmp = np.zeros((msz0,msz1,dim))
codebook = getattr(sm,'codebook')
codebook = SOM.denormalize_by(Data,codebook)
# codebook = SOM.denormalize(Data, codebook)
for i in range (codebook.shape[1]):
    tmp[:,:,i] = codebook[:, i].reshape(msz0,msz1)
from matplotlib import pyplot as plt
tmp.shape
fig = plt.imshow(tmp[:,:,0:3])

In [None]:
from numpy import genfromtxt, savetxt
import numpy as np

In [None]:
Data = genfromtxt(open('data/pollution.csv','r'),dtype=float, delimiter=',')[1:]
Labels = Data[:,0]
Data = Data[:,1:]
header= genfromtxt(open('data/pollution.csv','r'),delimiter=',',dtype = None)[0]
header = header[1:]
header = header[np.newaxis,:]

print 'size of data set: ', Data.shape

In [None]:
import sys
import numpy as np
import sompylib.som_structure as SOM
from matplotlib import pyplot as plt

In [None]:
msz0 = 50
msz1 = 50
cd = msz0*msz1*1*1
dlen = 100*1000*1*1*1#+224
dim = 3

In [None]:
from pandas.tools.plotting import scatter_matrix
from pandas import Series, DataFrame
import pandas as pd

In [None]:
df = DataFrame(data = Data[1:1000,:], columns= header.T)

fig = scatter_matrix(df, alpha=0.2, figsize=(10, 10), diagonal='kde')

In [None]:
reload(sys.modules['sompylib.som_structure'])
sm = SOM.SOM('sm', Data, mapsize = [msz0, msz1],norm_method = 'var',initmethod='pca')
sm.init_map()
setattr(sm, 'compname', header)
sm.view_map(which_dim = 'all')

In [None]:
sm.train(n_job = 1, shared_memory = 'no',verbose='on')

In [None]:
sm.view_map(which_dim = 'all')

In [None]:
sm.view_map(which_dim= 'all' , pack='Yes',text_size=6,save='No',save_dir='')

In [None]:
sm.hit_map()

In [None]:
import numpy as np
import sompylib.som_structure as SOM
from matplotlib import pyplot as plt
import sys

In [None]:
msz0 = 50
msz1 = 50
cd = msz0*msz1*1*1
dlen = 81920*1*1*1*1#+224
# dlen = 200*1000
dim = 256
Data = np.random.randint(0,2,size = (dlen,dim))

In [None]:
reload(sys.modules['sompylib.som_structure'])
sm = SOM.SOM('sm', Data, mapsize = [msz0, msz1],norm_method = 'var')
sm.train()

In [None]:
from numpy import genfromtxt, savetxt
import numpy as np

In [None]:
Data = genfromtxt(open('data/pollution.csv','r'),dtype=float, delimiter=',')[1:]
Labels = Data[:,0]
Data = Data[:,1:]
header= genfromtxt(open('data/pollution.csv','r'),delimiter=',',dtype = None)[0]
header = header[1:]
header = header[np.newaxis,:]
indnan = np.isnan(Data)
indrem = list()
print Data.shape
for i in range(Data.shape[0]):
    if ~np.any(indnan[i,:]):
        indrem.append(i)
Data = Data[indrem]
print(Data.shape)

In [None]:
import sys
import numpy as np
import sompylib.som_structure as SOM
from matplotlib import pyplot as plt

In [None]:
msz0 = 50
msz1 = 50
reload(sys.modules['sompylib.som_structure'])
sm = SOM.SOM('sm', Data[1:48*1000], mapsize = [msz0, msz1],norm_method = 'var')
sm.train(n_job = 1, shared_memory = 'no')

In [None]:
setattr(sm, 'compname', header)
sm.view_map(which_dim = 'all')

In [None]:
from pandas.tools.plotting import scatter_matrix
from pandas import Series, DataFrame
import pandas as pd

In [None]:
data = Data[48*1000:58*1000]
Target = 8
print 'Variable to predict: ', header[0][Target]
pred = sm.predict_by(data,Target, K =1)
real = data[:,Target]
accuracy = (1-np.abs((pred-real)/real))*100
print 'median accuracy', np.median(accuracy)
print 'mean accuracy', np.mean(accuracy)
print 'std accuracy', np.std(accuracy)
print 'min accuracy', np.min(accuracy)
print 'max accuracy', np.max(accuracy)
DF = DataFrame({'True Value': real[1:100], 'Predicted Value':pred[1:100]})
fig = plt.figure(); 
DF.plot(DF.index,DF.columns[:],label=header[0][Target],colormap='jet',x_compat=True,style='.-'); plt.legend(loc='best',bbox_to_anchor = (1.0, 1.0),fontsize = 'medium')
plt.ylabel('values')
font = {'size'   : 12}
plt.rc('font', **font)
fig.set_size_inches(10,10)