In [80]:
import random
import copy
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.spatial.distance import cdist
import numpy as np
import pandas as pd

In [83]:
class SOM():

    def __init__(self, neurons, dimentions, n_iter=1000, learning_rate=0.1):
        neighbourhood_radius = np.sum(neurons)
        self.neurons = neurons
        self.dimentions = dimentions
        self.weights = np.random.randint(0, 255, size=(neurons[0], neurons[1], dimentions)) / 255
        self.initial_learning_rate = learning_rate
        self.learning_rate = learning_rate
        self.n_iter = n_iter
        self.initial_neighbourhood_radius = neighbourhood_radius
        self.neighbourhood_radius = neighbourhood_radius
        self.time_constant = n_iter/np.log(self.initial_neighbourhood_radius)
        self.weights_ = None  # Cluster centres
        self.labels_ = None # Assign labels
    
    
    def _assignLabels(self, samples):
        dimentions = self.weights.shape
        self.weights_ = self.weights.reshape(dimentions[0] * dimentions[1], dimentions[2])
        labels = []
        for sample in samples:
            distances = cdist(self.weights_, sample, metric='euclidean')
            indices = np.where(distances == distances.min())
            labels.append(indices[0][0])
        self.labels_ = labels
    
    def _updateWeights(self, sample):
        dimentions = self.weights.shape
        distances = cdist(self.weights.reshape(dimentions[0]*dimentions[1], dimentions[2]), sample, metric='euclidean')
        distances = distances.reshape(dimentions[0], dimentions[1])
        indices = np.where(distances == distances.min()) 
        
        closestNeuron = self.weights[indices[0][0], indices[1][0]]
     
        distances = cdist(self.weights.reshape(dimentions[0] * dimentions[1], dimentions[2]), 
                          closestNeuron.reshape(1, dimentions[2]), metric='euclidean')
        distances = np.argsort(np.argsort(distances.reshape(dimentions[0] * dimentions[1])))
        distances = distances.reshape(dimentions[0], dimentions[1])

        influenceVector = copy.deepcopy(distances)
        influenceVector[distances > self.neighbourhood_radius] = -1
        influenceVector[influenceVector >= 0] = 1
        influenceVector[influenceVector == -1] = 0

        influenceValues =  np.exp(-np.multiply(distances, distances) / (2 * self.neighbourhood_radius * self.neighbourhood_radius))
        influenceValues = np.multiply(influenceVector, influenceValues)
        influenceValues = influenceValues.reshape(self.weights.shape[0], self.weights.shape[1], 1)

        self.weights = self.weights + np.multiply(influenceValues, (sample - self.weights))  * self.learning_rate

    def _updateLearningRate(self, iteration):
        self.learning_rate = self.initial_learning_rate * np.exp(-iteration/self.n_iter)

    def _updateNeighbourhoodRadius(self, iteration):
        self.neighbourhood_radius = self.initial_neighbourhood_radius * np.exp(-iteration/self.time_constant)

    def train(self, samples):
        
        for i in range(1, self.n_iter+1):
            
            for _ in samples:
                sample = random.choice(samples)
                self._updateWeights(sample)
            self._updateLearningRate(i)
            self._updateNeighbourhoodRadius(i)
        self._assignLabels(samples)
    
    def predict(self, samples):
        result = []
        for sample in samples:
            distances = cdist(self.weights_, sample, metric='euclidean')
            indices = np.where(distances == distances.min()) 
            result.append(indices[0][0])
        return np.array(result)


In [84]:
Dataset = np.genfromtxt('Tugas 2 ML Genap 2018-2019 Dataset Tanpa Label.csv', delimiter=",")
Dataset = np.asarray(Dataset)
Dataset = np.reshape(Dataset, (Dataset.shape[0],1,Dataset.shape[1]))

s = SOM(neurons=(10,1), dimentions=2, n_iter=100, learning_rate=0.1)
s.train(Dataset)
print("Jumlah Cluster :", len(s.weights_))
print("Cluster centres:", s.weights_)
print("labels:",s.labels_)
result = s.predict(samples)

Jumlah Cluster : 10
Cluster centres: [[14.03691276  4.94632149]
 [ 9.00509793 10.97863463]
 [ 6.3365599   5.48451608]
 [14.82406947 13.0690091 ]
 [ 4.20391453  7.12474413]
 [ 9.73999791  9.6678576 ]
 [14.13294121  5.03533771]
 [ 6.61822924 14.62267912]
 [15.3895495  11.88856946]
 [ 8.66190081  3.6922967 ]]
labels: [5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, 1, 1, 5, 1, 5, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1, 1, 5, 1, 1, 1, 3, 5, 5, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 1, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5,