In [2]:
from __future__ import division

import socket
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import patches as patches
import pandas as pd
import pdb
import pickle

In [3]:

shape_names=["cuboid","cylinder","handle with bristles","high convexity","long narrow rectangular","medium convexity with handle","rectangular thin with legs","rectangular thin with legs and drawers","small thick rectangular","thin rectangular"]

In [4]:
shape_data=pd.read_csv("/home/hasan/Documents/BitRepos/oroc_2.0/data/shape_data.csv").values
shape_data=shape_data[:,1:]
print(shape_data.shape)


(10, 10)


In [53]:

# Some initializations
network_dimensions = np.array([5, 5])
n_iterations = 10
init_learning_rate = 0.01
normalise_data = True


In [54]:

# if True, assume all data on common scale
# if False, normalise to [0 1] range along each column
normalise_by_column = False

# establish variables based on data
m = shape_data.shape[0]
n = shape_data.shape[1]
print(m)

10


In [55]:

# initial neighbourhood radius
init_radius = max(network_dimensions[0], network_dimensions[1]) / 2
# radius decay parameter
time_constant = n_iterations / np.log(init_radius)

data = shape_data
print(init_radius, time_constant)

2.5 10.913566679372915


In [56]:
# check if data needs to be normalised
if normalise_data:
    if normalise_by_column:
        # normalise along each column
        col_maxes = raw_data.max(axis=0)
        data = shape_data / col_maxes[np.newaxis, :]
    else:
        # normalise entire dataset
        data = shape_data / data.max()

In [57]:

#serialize data
pickle.dump(data, open("shape_data.p", "wb"))

In [58]:
# setup random weights between 0 and 1
# weight matrix needs to be one m-dimensional vector for each neuron in the SOM
net = np.random.random((network_dimensions[0], network_dimensions[1], m))
print(net)

[[[0.33877396 0.62501607 0.10251787 0.68594878 0.47143449 0.34207053
   0.58322057 0.79447186 0.22267809 0.49064654]
  [0.70073984 0.64673307 0.05011367 0.29322847 0.88640964 0.92124162
   0.57436467 0.82163318 0.93810863 0.05148333]
  [0.2219329  0.00789112 0.64931268 0.56814151 0.54177264 0.86466827
   0.1626175  0.97848469 0.46983433 0.96390081]
  [0.08865803 0.67646186 0.89579111 0.94316381 0.5944387  0.45231804
   0.61882779 0.093668   0.86966456 0.1769142 ]
  [0.02450241 0.28168051 0.57137428 0.12121709 0.6771727  0.71148189
   0.46322956 0.81381903 0.76400822 0.43794495]]

 [[0.77835908 0.52124963 0.13905076 0.51749855 0.62303285 0.55560051
   0.68331051 0.24430258 0.72877631 0.27931099]
  [0.45277249 0.21489886 0.73271829 0.06588231 0.16327448 0.62492395
   0.39524936 0.61565948 0.70553894 0.67271278]
  [0.3175911  0.01371093 0.05643814 0.99593402 0.34626615 0.22386655
   0.50648489 0.68082506 0.34889946 0.25920927]
  [0.17403588 0.66410824 0.75675143 0.60993617 0.38883812 0.66

In [59]:
def find_bmu(t, net, m):
    """
        Find the best matching unit for a given vector, t, in the SOM
        Returns: a (bmu, bmu_idx) tuple where bmu is the high-dimensional BMU
                 and bmu_idx is the index of this vector in the SOM
    """
    bmu_idx = np.array([0, 0])
    # set the initial minimum distance to a huge number
    min_dist = np.iinfo(np.int).max    
    # calculate the high-dimensional distance between each neuron and the input
    for x in range(net.shape[0]):
        for y in range(net.shape[1]):
            w = net[x, y, :].reshape(m, 1)
            # don't bother with actual Euclidean distance, to avoid expensive sqrt operation
            sq_dist = np.sum((w - t) ** 2)
            if sq_dist < min_dist:
                min_dist = sq_dist
                bmu_idx = np.array([x, y])
    # get vector corresponding to bmu_idx
    bmu = net[bmu_idx[0], bmu_idx[1], :].reshape(m, 1)
    # return the (bmu, bmu_idx) tuple
    return (bmu, bmu_idx)


In [60]:

def decay_radius(initial_radius, i, time_constant):
    return initial_radius * np.exp(-i / time_constant)

def decay_learning_rate(initial_learning_rate, i, n_iterations):
    return initial_learning_rate * np.exp(-i / n_iterations)

def calculate_influence(distance, radius):
    return np.exp(-distance / (2* (radius**2)))


In [65]:
def runMain():
    
    
    for i in range(n_iterations):
        
        
    
    # select a training example at random
        t = data[ np.random.randint(0, n),:].reshape(np.array([m, 1]))
    
    # find its Best Matching Unit
        bmu, bmu_idx = find_bmu(t, net, m)
    
    # decay the SOM parameters
        r = decay_radius(init_radius, i, time_constant)
        l = decay_learning_rate(init_learning_rate, i, n_iterations)
    
        print("learning rate", l)
    # now we know the BMU, update its weight vector to move closer to input
    # and move its neighbours in 2-D space closer
    # by a factor proportional to their 2-D distance from the BMU
        for x in range(net.shape[0]):
            for y in range(net.shape[1]):
                w = net[x, y, :].reshape(m, 1)

            # get the 2-D distance (again, not the actual Euclidean distance)
                w_dist = np.sum((np.array([x, y]) - bmu_idx) ** 2)
                #print("20D distance: ",w_dist)
            # if the distance is within the current neighbourhood radius
                if w_dist <= r**2:
                # calculate the degree of influence (based on the 2-D distance)
                    influence = calculate_influence(w_dist, r)
                # now update the neuron's weight using the formula:
                # new w = old w + (learning rate * influence * delta)
                # where delta = input vector (t) - old w
                    new_w = w + (l * influence * (t - w))
                # commit the new weight
                    net[x, y, :] = new_w.reshape(1, 10)
                    #print(net)

                
    
        if(i==10):
            print("Iteration:",i)
            saveList()
        elif(i==50):
            print("Iteration:", i)
            saveList()
        elif(i==100):
            print("Iteration:",i)
            saveList()
        elif(i==150):
            print("Iteration:", i)
            saveList()
        elif(i==200):
            print("Iteration:", i)
            saveList()
        elif(i==500):
            print("Iteration:", i)
            saveList()
        elif(i==1000):
            print("Iteration:", i)
            saveList()
        elif(i==5000):
            print("Iteration:", i)
            saveList()
        elif(i==10000):
            print("Iteration:", i)
            saveList()
        elif(i==15000):
            print("Iteration:", i)
            saveList()
        elif(i==19999):
            print("Iteration:", i)
            saveList()
        
   
        
            # Dictionary with key - index in the weight matrix, Value - material
# Ex - {(0,3): 'Adhesive', (9,4): 'Brick',...}



In [66]:
# ---GRAPHICAL VISUALIZATIONS ---
def visualization(list1,list2):
    print("Graph for 5*5 size SOM")
    f = plt.figure(figsize=(15,10))
    ax = f.subplots()
    ax.scatter(list1, list2)
    for i, txt in enumerate(shape_names):
        ax.annotate(txt, (list1[i], list2[i]), size=20)

In [67]:
def saveList():
    
    
    # Dictionary with key - index in the weight matrix, Value - material
# Ex - {(0,3): 'Adhesive', (9,4): 'Brick',...}
    shape_idx = {}
    group = np.zeros(10)
# List of x coordinate of index
    list1 = []
# List of y coordinate of index
    list2 =[]
    for i in range(10):
        
            
            
        t = data[i,:].reshape(np.array([m, 1]))
        bmu, bmu_idx = find_bmu(t, net, m)
        bmu_idx = tuple(bmu_idx)
        if bmu_idx in shape_idx:
                     
            shape_idx[bmu_idx].append(shape_names[i])
        else:
            tempList = []
            tempList.append(shape_names[i])
            shape_idx[bmu_idx] = tempList
            print(i, shape_names[i], bmu_idx)
        group[i]=bmu_idx[0]
        list1.append(bmu_idx[0])
        list2.append(bmu_idx[1])
    visualization(list1,list2)
    

In [68]:
runMain()

learning rate 0.01
learning rate 0.009048374180359595
learning rate 0.008187307530779819
learning rate 0.007408182206817179
learning rate 0.006703200460356393
learning rate 0.006065306597126334
learning rate 0.005488116360940264
learning rate 0.004965853037914096
learning rate 0.004493289641172216
learning rate 0.004065696597405992
