In [1]:
from __future__ import division

import numpy as np
from matplotlib import pyplot as plt
from matplotlib import patches as patches
import pandas as pd
%matplotlib inline
import pdb

In [2]:
raw_data=pd.read_csv("/home/hasan/files/simdatawdT.csv").values
raw_data=raw_data[:,1:]
print(raw_data.shape)

(21, 21)


In [3]:
network_dimensions = np.array([5, 5])
n_iterations = 100
init_learning_rate = 0.01

normalise_data = True

# if True, assume all data on common scale
# if False, normalise to [0 1] range along each column
normalise_by_column = False

In [4]:
# establish variables based on data
m = raw_data.shape[0]
n = raw_data.shape[1]
print(m)
# initial neighbourhood radius
init_radius = max(network_dimensions[0], network_dimensions[1]) / 2
# radius decay parameter
time_constant = n_iterations / np.log(init_radius)

data = raw_data
#print(data)
# check if data needs to be normalised
if normalise_data:
    if normalise_by_column:
        # normalise along each column
        col_maxes = raw_data.max(axis=0)
        data = raw_data / col_maxes[np.newaxis, :]
    else:
        # normalise entire dataset
        data = raw_data / data.max()
#print(data)

21


In [5]:
# setup random weights between 0 and 1
# weight matrix needs to be one m-dimensional vector for each neuron in the SOM
net = np.random.random((network_dimensions[0], network_dimensions[1], m))
print(net)

[[[0.94679923 0.49333142 0.43842692 0.01461416 0.17922219 0.38322576
   0.10242344 0.43778575 0.54620553 0.27819466 0.01652513 0.77675443
   0.27181993 0.5970479  0.94229044 0.01630098 0.77194746 0.74730693
   0.97119079 0.21908855 0.1133605 ]
  [0.04537509 0.77910008 0.23985007 0.85837294 0.70426778 0.94712947
   0.41659162 0.91247663 0.57682289 0.30287975 0.51866696 0.50638499
   0.9803403  0.08616808 0.81346149 0.21495874 0.73151981 0.34893355
   0.54387021 0.66181458 0.54151738]
  [0.54759471 0.59735998 0.23880782 0.31425952 0.16308209 0.51953737
   0.94941457 0.20568444 0.18920646 0.36383272 0.64764211 0.26408497
   0.75116523 0.38905743 0.41245593 0.79097316 0.58458949 0.27842119
   0.97895524 0.91269857 0.38448963]
  [0.47958196 0.87224494 0.97043157 0.06482139 0.92927684 0.69197741
   0.46205632 0.18972573 0.11116137 0.95307673 0.84269252 0.55895369
   0.48237015 0.06050521 0.58268626 0.53654481 0.46188421 0.24730075
   0.8536995  0.42233082 0.42898378]
  [0.29223413 0.54771189

In [6]:

def find_bmu(t, net, m):
    """
        Find the best matching unit for a given vector, t, in the SOM
        Returns: a (bmu, bmu_idx) tuple where bmu is the high-dimensional BMU
                 and bmu_idx is the index of this vector in the SOM
    """
    bmu_idx = np.array([0, 0])
    # set the initial minimum distance to a huge number
    min_dist = np.iinfo(np.int).max    
    # calculate the high-dimensional distance between each neuron and the input
    for x in range(net.shape[0]):
        for y in range(net.shape[1]):
            w = net[x, y, :].reshape(m, 1)
            # don't bother with actual Euclidean distance, to avoid expensive sqrt operation
            sq_dist = np.sum((w - t) ** 2)
            if sq_dist < min_dist:
                min_dist = sq_dist
                bmu_idx = np.array([x, y])
    # get vector corresponding to bmu_idx
    bmu = net[bmu_idx[0], bmu_idx[1], :].reshape(m, 1)
    # return the (bmu, bmu_idx) tuple
    return (bmu, bmu_idx)

In [7]:

def decay_radius(initial_radius, i, time_constant):
    return initial_radius * np.exp(-i / time_constant)

In [8]:
def decay_learning_rate(initial_learning_rate, i, n_iterations):
    return initial_learning_rate * np.exp(-i / n_iterations)

In [9]:
def calculate_influence(distance, radius):
    return np.exp(-distance / (2* (radius**2)))

In [10]:
for i in range(n_iterations):
    #print('Iteration %d' % i)
    
    # select a training example at random
    t = data[:, np.random.randint(0, n)].reshape(np.array([m, 1]))
    #print("first t value:", t);
    
    # find its Best Matching Unit
    bmu, bmu_idx = find_bmu(t, net, m)
    #print("bmu_index: ",bmu_idx)
    
    # decay the SOM parameters
    r = decay_radius(init_radius, i, time_constant)
    l = decay_learning_rate(init_learning_rate, i, n_iterations)
    
    # now we know the BMU, update its weight vector to move closer to input
    # and move its neighbours in 2-D space closer
    # by a factor proportional to their 2-D distance from the BMU
    for x in range(net.shape[0]):
        for y in range(net.shape[1]):
            w = net[x, y, :].reshape(m, 1)
            
            #print("net",net)
            #print("wwww:",w)
            # get the 2-D distance (again, not the actual Euclidean distance)
            w_dist = np.sum((np.array([x, y]) - bmu_idx) ** 2)
            # if the distance is within the current neighbourhood radius
            if w_dist <= r**2:
                # calculate the degree of influence (based on the 2-D distance)
                influence = calculate_influence(w_dist, r)
                # now update the neuron's weight using the formula:
                # new w = old w + (learning rate * influence * delta)
                # where delta = input vector (t) - old w
                new_w = w + (l * influence * (t - w))
                # commit the new weight
                net[x, y, :] = new_w.reshape(1, 21)
                

In [11]:
material_nams=\
["Adhesive","Brick","Cardboard","Ceramic","Cloth","Concrete","Cotton","Diamond","Glass","Leather","Marble","Metal","Paper","Plastic","Porcelain","Rubber","Stone","Styrofoam","Wax","Wood","Wool"]