<a href="https://colab.research.google.com/github/pranav-vijayananth/SOMResearch/blob/main/tensorflowsom_som_plotting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!wget https://raw.githubusercontent.com/cgorman/tensorflow-som/master/tf_som.py

In [None]:
!pip3 install tensorflow==1.15

In [None]:
import numpy as np

np.set_printoptions(suppress=True)

import math
import time
import matplotlib.pyplot as plt
import tensorflow as tf
from tf_som import SelfOrganizingMap
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
from scipy.spatial import distance_matrix
import subprocess
from collections import Counter
from sklearn.metrics.cluster import adjusted_rand_score
import logging

print(tf.__version__)

In [None]:
def get_umatrix(input_vects, weights, m, n):
    umatrix = np.zeros((m * n, 1))
    neuron_locs = list()
    for i in range(m):
        for j in range(n):
            neuron_locs.append(np.array([i, j]))
    neuron_distmat = distance_matrix(neuron_locs, neuron_locs)

    for i in range(m * n):

        neighbor_idxs = neuron_distmat[i] <= 1 #CHANGE TO 2 LATER
        neighbor_weights = weights[neighbor_idxs]
        umatrix[i] = distance_matrix(np.expand_dims(weights[i], 0), neighbor_weights).mean()

    bmu_indices = []
    for vect in input_vects:
        min_index = min([i for i in range(len(list(weights)))],
                        key=lambda x: np.linalg.norm(vect-
                                                     list(weights)[x]))
        bmu_indices.append(neuron_locs[min_index])
        
    return umatrix, bmu_indices

In [None]:
filename = "zoo.csv"
datafile = open(f"/content/drive/MyDrive/Datasets/commonfiles/{filename}")

In [None]:
#DATA PREP

data = []
groundtruth = []
dataread = datafile.readline()
dataread = datafile.readline()

while dataread != "": 
  a = dataread.split(",")
  l2 = []
  for j in range(0, len(a), 1):
    if j == len(a)-1:
      groundtruth.append(a[j].strip())
    else:
      try: 
        l2.append(float(a[j]))
      except:
        l2.append(0)
  data.append(l2)
  dataread = datafile.readline()

rows = len(data)
cols = len(data[0])
num_inputs = rows * cols

datafile.close()

In [None]:
print(f"This is the data: {data}")
print(f"Number of cols: {cols}")
print(f"These are the number of rows {rows}")
print(f"This is the grountruth: {groundtruth}")
print(f"Number of input vectors: {num_inputs}")

#CLUSTERS
uniqueValues = Counter(groundtruth).keys()
clusters = len(uniqueValues)
print(f"The clusters in dataset: {clusters}")

In [None]:
#CREATING THE TENSORFLOW GRAPH AND LOGS FOR THE MODEL
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
graph = tf.Graph()

with graph.as_default():
  session = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=False
  ))

  #NEURONS FOR FEATURE MAP 
  neurons = int(5*math.sqrt(rows))
  m = int(math.sqrt(neurons)+1)
  n = int(math.sqrt(neurons)+1)
  neurons = (m*n)

  #BATCH SIZE 
  batch_size = 16

  #CONVERT TO NUMPY ARRAY FOR TF DATA PIPELINE
  data = np.array(data, dtype="float32")

  #MAKING THE TENSORFLOW DATRASET PIPELINE 
  input_data = tf.data.Dataset.from_tensor_slices(data)
  input_data = input_data.repeat()
  input_data = input_data.batch(batch_size)
  iterator = input_data.make_one_shot_iterator()
  next_element = iterator.get_next()

  #BUILDING THE SOM OBJECT
  som = SelfOrganizingMap(m=20, n=20, dim=cols, max_epochs=100, session=session, graph=graph, input_tensor=next_element, batch_size=batch_size, initial_learning_rate=0.1, model_name='Self-Organizing-Map', softmax_activity=True)

  #MAKING + RUNNING SESSION
  init_op = tf.global_variables_initializer()
  session.run([init_op])

  start = time.time()

  #TRAINING
  som.train(num_inputs)

  stop = time.time()

  #TIME 
  print(f"Training time: {stop - start}s")

  #WEIGHTS
  som_weights = som.output_weights

  #PLOTTING THE HEAT MAP
  umatrix, bmu_loc = get_umatrix(data, som_weights, 20, 20)
  fig = plt.figure(10)
  plt.imshow(umatrix.reshape((20, 20)), origin='lower')
  plt.show(block=True)
