In [1]:
from __future__ import division

import numpy as np
from matplotlib import pyplot as plt
from matplotlib import patches as patches
import pandas as pd
%matplotlib inline
import pdb

In [2]:
raw_data=pd.read_csv("/home/hasan/files/simdatawdT.csv").values
raw_data=raw_data[:,1:]
print(raw_data.shape)

(21, 21)


In [3]:
network_dimensions = np.array([5, 5])
n_iterations = 100
init_learning_rate = 0.01

normalise_data = True

# if True, assume all data on common scale
# if False, normalise to [0 1] range along each column
normalise_by_column = False

In [4]:
# establish variables based on data
m = raw_data.shape[0]
n = raw_data.shape[1]
print(m)
# initial neighbourhood radius
init_radius = max(network_dimensions[0], network_dimensions[1]) / 2
# radius decay parameter
time_constant = n_iterations / np.log(init_radius)

data = raw_data
#print(data)
# check if data needs to be normalised
if normalise_data:
    if normalise_by_column:
        # normalise along each column
        col_maxes = raw_data.max(axis=0)
        data = raw_data / col_maxes[np.newaxis, :]
    else:
        # normalise entire dataset
        data = raw_data / data.max()
#print(data)

21


In [5]:
# setup random weights between 0 and 1
# weight matrix needs to be one m-dimensional vector for each neuron in the SOM
net = np.random.random((network_dimensions[0], network_dimensions[1], m))
print(net)

[[[0.36383675 0.24488414 0.16076485 0.11199806 0.86754123 0.21070683
   0.0482992  0.11071834 0.54831729 0.10074019 0.04155451 0.04407962
   0.34039081 0.7189615  0.61867576 0.37455686 0.72673651 0.92523542
   0.16640545 0.58308479 0.46674457]
  [0.98391621 0.26178818 0.39545848 0.86700142 0.26069047 0.97946335
   0.75781381 0.57871681 0.82115334 0.22166554 0.45318613 0.79055531
   0.63802564 0.77579031 0.23651876 0.68089072 0.60534251 0.35608322
   0.16037003 0.04098698 0.88076593]
  [0.7631997  0.403208   0.36804373 0.71537129 0.5161624  0.57847017
   0.30885872 0.77211194 0.78388986 0.24129995 0.97789622 0.82329785
   0.44961909 0.05630367 0.57176235 0.37168175 0.0475949  0.73241662
   0.15998214 0.87074825 0.55075039]
  [0.04439978 0.53407656 0.2902075  0.39517489 0.80321574 0.89701984
   0.4178289  0.54109394 0.37773152 0.6337367  0.90410438 0.69085064
   0.89242351 0.47435462 0.90095963 0.68006538 0.98941701 0.2050214
   0.09732192 0.48901427 0.39623871]
  [0.24451806 0.98900619 

In [6]:

def find_bmu(t, net, m):
    """
        Find the best matching unit for a given vector, t, in the SOM
        Returns: a (bmu, bmu_idx) tuple where bmu is the high-dimensional BMU
                 and bmu_idx is the index of this vector in the SOM
    """
    bmu_idx = np.array([0, 0])
    # set the initial minimum distance to a huge number
    min_dist = np.iinfo(np.int).max    
    # calculate the high-dimensional distance between each neuron and the input
    for x in range(net.shape[0]):
        for y in range(net.shape[1]):
            w = net[x, y, :].reshape(m, 1)
            # don't bother with actual Euclidean distance, to avoid expensive sqrt operation
            sq_dist = np.sum((w - t) ** 2)
            if sq_dist < min_dist:
                min_dist = sq_dist
                bmu_idx = np.array([x, y])
    # get vector corresponding to bmu_idx
    bmu = net[bmu_idx[0], bmu_idx[1], :].reshape(m, 1)
    # return the (bmu, bmu_idx) tuple
    return (bmu, bmu_idx)

In [7]:

def decay_radius(initial_radius, i, time_constant):
    return initial_radius * np.exp(-i / time_constant)

In [8]:
def decay_learning_rate(initial_learning_rate, i, n_iterations):
    return initial_learning_rate * np.exp(-i / n_iterations)

In [9]:
def calculate_influence(distance, radius):
    return np.exp(-distance / (2* (radius**2)))

In [10]:
for i in range(n_iterations):
    #print('Iteration %d' % i)
    
    # select a training example at random
    t = data[:, np.random.randint(0, n)].reshape(np.array([m, 1]))
    #print("first t value:", t);
    
    # find its Best Matching Unit
    bmu, bmu_idx = find_bmu(t, net, m)
    #print("bmu_index: ",bmu_idx)
    
    # decay the SOM parameters
    r = decay_radius(init_radius, i, time_constant)
    l = decay_learning_rate(init_learning_rate, i, n_iterations)
    
    # now we know the BMU, update its weight vector to move closer to input
    # and move its neighbours in 2-D space closer
    # by a factor proportional to their 2-D distance from the BMU
    for x in range(net.shape[0]):
        for y in range(net.shape[1]):
            w = net[x, y, :].reshape(m, 1)
            
            #print("net",net)
            #print("wwww:",w)
            # get the 2-D distance (again, not the actual Euclidean distance)
            w_dist = np.sum((np.array([x, y]) - bmu_idx) ** 2)
            # if the distance is within the current neighbourhood radius
            if w_dist <= r**2:
                # calculate the degree of influence (based on the 2-D distance)
                influence = calculate_influence(w_dist, r)
                # now update the neuron's weight using the formula:
                # new w = old w + (learning rate * influence * delta)
                # where delta = input vector (t) - old w
                new_w = w + (l * influence * (t - w))
                # commit the new weight
                net[x, y, :] = new_w.reshape(1, 21)
                

In [11]:
material_nams=\
["Adhesive","Brick","Cardboard","Ceramic","Cloth","Concrete","Cotton","Diamond","Glass","Leather","Marble","Metal","Paper","Plastic","Porcelain","Rubber","Stone","Styrofoam","Wax","Wood","Wool"]

In [12]:
for i, m in enumerate(net):
    #print(m[1])
    
    print("m10:",m[1])
    #plt.text(m[1], material_nams[i], ha='center', va='center',
    #     bbox=dict(facecolor='white', alpha=1, lw=0))




m10: [0.84657294 0.2503347  0.39652795 0.75102028 0.28365945 0.8332148
 0.66737681 0.50883181 0.718724   0.25631688 0.40886709 0.67933864
 0.5996794  0.71442911 0.23668673 0.63104267 0.53073233 0.36095159
 0.18655456 0.10033463 0.75235403]
m10: [0.28606374 0.2323091  0.27678632 0.64996323 0.59704885 0.31789276
 0.15755944 0.39395496 0.21833479 0.56455969 0.31866696 0.19119011
 0.71205064 0.20452826 0.30882738 0.80555202 0.47532363 0.35423881
 0.3127561  0.68787108 0.39935084]
m10: [0.37612092 0.39503244 0.38773841 0.75051843 0.49222657 0.71651751
 0.44910524 0.37875913 0.15430432 0.44387779 0.23130657 0.26273842
 0.22895571 0.5148364  0.57832009 0.38204493 0.56073954 0.28573422
 0.50902918 0.68467445 0.71895229]
m10: [0.32613953 0.48733335 0.63634333 0.22488227 0.47031792 0.63168609
 0.25245408 0.73592513 0.4465212  0.69617022 0.20560153 0.07276251
 0.26924951 0.60643064 0.64971094 0.56372588 0.63509517 0.0937369
 0.64155169 0.67764715 0.67275382]
m10: [0.4283123  0.45553717 0.08742118

In [13]:
def find_classification(t, net, m):
  
    bmu_idx = np.array([0, 0])
    # set the initial minimum distance to a huge number
    min_dist = np.iinfo(np.int).max    
    # calculate the high-dimensional distance between each neuron and the input
    for x in range(net.shape[0]):
        for y in range(net.shape[1]):
            w = net[x, y, :].reshape(m, 1)
            # don't bother with actual Euclidean distance, to avoid expensive sqrt operation
            sq_dist = np.sum((w - t) ** 2)
            if sq_dist < min_dist:
                min_dist = sq_dist
                bmu_idx = np.array([x, y])
    # get vector corresponding to bmu_idx
    bmu = net[bmu_idx[0], bmu_idx[1], :].reshape(m, 1)
    # return the (bmu, bmu_idx) tuple
    return (bmu, bmu_idx)

TypeError: only integer scalar arrays can be converted to a scalar index

In [15]:
for i in range(raw_data.shape[0]):
    #print('Iteration %d' % i)
    # select a training example at random
    t = data[:, i].reshape(np.array([m, 1]))
    #print("first t value:", t);
    #print(t)
    # find its Best Matching Unit
    bmu, bmu_idx = find_classification(t, net, m)
    print("bmu_index: ",bmu_idx)
    
   

TypeError: only integer scalar arrays can be converted to a scalar index

In [16]:
bmu_index:  [1 2]
bmu_index:  [2 3]
bmu_index:  [2 2]
bmu_index:  [1 2]
bmu_index:  [1 1]
bmu_index:  [2 3]
bmu_index:  [1 1]
bmu_index:  [2 3]
bmu_index:  [2 3]
bmu_index:  [1 1]
bmu_index:  [1 2]
bmu_index:  [2 3]
bmu_index:  [2 2]
bmu_index:  [3 4]
bmu_index:  [1 2]
bmu_index:  [1 1]
bmu_index:  [2 3]
bmu_index:  [1 1]
bmu_index:  [2 2]
bmu_index:  [2 4]
bmu_index:  [1 1]

SyntaxError: invalid syntax (<ipython-input-16-a0061baa1032>, line 1)