# Tugas Self-Organizing Map JST-D
- Nama: Rayhan Egar Sadtya Nugraha
- NIM: 225150201111014
- Kelas: TIF-JST-D

In [1]:
import numpy as np
import pandas as pd

## Dataset

Dataset pada tugas ini menggunakan data yang terdapat pada Slide "6.0 - Self-organizing Maps (SOM)"

In [33]:
# Define dataset
X = ((1, 1, 0, 0),
     (0, 0, 0, 1),
     (1, 0, 0, 0),
     (0, 0, 1, 1))

## Algoritma Kohonen
1. Inisialisasi parameter dan matrix terkait, seperti: 
    - Inisialisasi bobot $w_{ij}$
    - Set parameter radius ($r$)
    - Set learning rate ($a$)
    - Faktor perubahan learning rate ($b$)

2. Proses training (3--8) berlangsung selama perubahan $w$ kurang dari threshold.
3. Untuk setiap input vector $i$: lakukan langkah 4--6

4. Untuk setiap output neuron $j$: hitung $$D(j) = \sum(w_{ij} - x_i)^2$$
5. Cari nilai $j$ dengan $(Dj)$ terkecil
6. Untuk semua $w_{ij}$ untuk $j$ terkcil, lakukan update bobot: $w'_{ij} = w_{ij} + a(x_i - w_{ij})$ dengan $i$ adalah nomor fitur.
7. Update _learning rate_ setelah satu epoch dengan $$a' = b \times a$$

8. Cek stopping criteria: _epoch_ = _max epoch_ atau $\text{max}(\Delta w) \le \text{threshold}$

## Fitting Function

In [34]:
def calculate_distance(w, x):
    w = np.array(w)
    x = np.array(x)

    delta = (w-x) ** 2
    return sum(delta)

In [41]:
def som_clustering(data, a, b, r = 0, max_epoch = 10, output_neuron = 2, threshold = 1e-3, verbose=False, w_predefined=False):

    # Menyimpan data dalam NumPy 2D Array
    data = np.array(data)

    # Jumlah fitur
    i = data.shape[1] 

    # Jumlah kelas
    j = output_neuron

    # Inisialisasi parameter
    max_epoch = max_epoch
    if(w_predefined):
        w = np.array([[0.2, 0.6, 0.5, 0.9],
                      [0.8, 0.4, 0.7, 0.3]])
    else:
        w = [np.random.rand(j, i)]

    epoch = 0
    distances = []
    max_change = threshold + 1

    # Iterasi epoch-level
    while max_change > threshold and epoch < max_epoch:
        print(f"Clustering SOM pada Epoch: {epoch}")
        max_change = 0

        # Iterasi data-level
        for x in data:
                
            # Hitung jarak antara data dengan weight masing-masing output neuron
            distances = np.array([calculate_distance(x, w[k]) for k in range(j)])
            bmu_index = np.argmin(distances)

            if(verbose):
                print()
                print(f"Clustering for data {x}:")
                print(f"----Distances: {distances}")
                print(f"----BMU Index: {bmu_index}")
                print(f"----Updating BMU:")
                print(f"--------Old w[{bmu_index}]: {w[bmu_index]}")
            
            # Melakukan weight update untuk Best-Matching Unit (BMU) saja
            if (r==0):
                for i in range(i):

                    w_change = a * (x[i] - w[bmu_index, i])
                    if abs(w_change) > max_change:
                        max_change = abs(w_change)

                    w[bmu_index, i] = w[bmu_index, i] + w_change
                
                if(verbose):
                        print(f"--------New w[{bmu_index}]: {w[bmu_index]}")

            # Melakukan weight update jika r > 0
            if (r>0):
                print()
                print(f"----Updating Neighboring Output Neuron:")
                for k in range(j):
                    distance_to_bmu = abs(k - bmu_index)
                    if distance_to_bmu <= r:
                        if(verbose):
                            print(f"--------Output unit {k} old w[{k}]: {w[k]}")
                        influence_ratio = np.exp( -distance_to_bmu**2 / (2 * (r**2)) )
                        w_change = influence_ratio * a * (x - w[k])

                        if max(abs(w_change)) > max_change:
                            max_change = max(abs(w_change))
                        
                        w[k] = w[k] + w_change
                        if(verbose):
                            print(f"--------Output unit {k} new w[{k}]: {w[k]}")

        a = b * a
        print(f"Max change pada Epoch ke-{epoch}: {max_change}")
        print()
        epoch += 1
    
    # Proses clustering data
    clusters = []
    for x in data:
        distances = np.array([calculate_distance(x, w[k]) for k in range(j)])
        bmu_index = np.argmin(distances)
        clusters.append(bmu_index)
    
    return w, clusters

## Use-Case

In [42]:
w, clusters = som_clustering(X, a=0.6, b=0.5, r=0, verbose=True, w_predefined=True)

Clustering SOM pada Epoch: 0

Clustering for data [1 1 0 0]:
----Distances: [1.86 0.98]
----BMU Index: 1
----Updating BMU:
--------Old w[1]: [0.8 0.4 0.7 0.3]
--------New w[1]: [0.92 0.76 0.28 0.12]

Clustering for data [0 0 0 1]:
----Distances: [0.66   2.2768]
----BMU Index: 0
----Updating BMU:
--------Old w[0]: [0.2 0.6 0.5 0.9]
--------New w[0]: [0.08 0.24 0.2  0.9 ]

Clustering for data [1 0 0 0]:
----Distances: [1.754  0.6768]
----BMU Index: 1
----Updating BMU:
--------Old w[1]: [0.92 0.76 0.28 0.12]
--------New w[1]: [0.968 0.304 0.28  0.12 ]

Clustering for data [0 0 1 1]:
----Distances: [0.714   2.32224]
----BMU Index: 0
----Updating BMU:
--------Old w[0]: [0.08 0.24 0.2  0.9 ]
--------New w[0]: [0.032 0.24  0.2   0.9  ]
Max change pada Epoch ke-0: 0.45599999999999996

Clustering SOM pada Epoch: 1

Clustering for data [1 1 0 0]:
----Distances: [2.364624 0.57824 ]
----BMU Index: 1
----Updating BMU:
--------Old w[1]: [0.968 0.304 0.28  0.12 ]
--------New w[1]: [0.968 0.304 0.28  

In [43]:
print(w)

[[0.032 0.24  0.2   0.9  ]
 [0.968 0.304 0.28  0.12 ]]


In [44]:
print(clusters)

[1, 0, 1, 0]


## Use-case dengan Neighboring Output Neuron update (r>0)

In [45]:
w, clusters = som_clustering(X, a=0.6, b=0.5, r=1, verbose=True, w_predefined=True)

Clustering SOM pada Epoch: 0

Clustering for data [1 1 0 0]:
----Distances: [1.86 0.98]
----BMU Index: 1
----Updating BMU:
--------Old w[1]: [0.8 0.4 0.7 0.3]

----Updating Neighboring Output Neuron:
--------Output unit 0 old w[0]: [0.2 0.6 0.5 0.9]
--------Output unit 0 new w[0]: [0.49113472 0.74556736 0.3180408  0.57247344]
--------Output unit 1 old w[1]: [0.8 0.4 0.7 0.3]
--------Output unit 1 new w[1]: [0.92 0.76 0.28 0.12]

Clustering for data [0 0 0 1]:
----Distances: [1.0810129 2.2768   ]
----BMU Index: 0
----Updating BMU:
--------Old w[0]: [0.49113472 0.74556736 0.3180408  0.57247344]

----Updating Neighboring Output Neuron:
--------Output unit 0 old w[0]: [0.49113472 0.74556736 0.3180408  0.57247344]
--------Output unit 0 new w[0]: [0.19645389 0.29822694 0.12721632 0.82898938]
--------Output unit 1 old w[1]: [0.92 0.76 0.28 0.12]
--------Output unit 1 new w[1]: [0.58519508 0.48342202 0.17810285 0.44024819]

Clustering for data [1 0 0 0]:
----Distances: [1.43803305 0.63129907]


In [46]:
print(w)

[[0.32249334 0.14053163 0.40511681 0.67828392]
 [0.59128837 0.24523669 0.24879942 0.40948889]]


In [47]:
print(clusters)

[1, 0, 1, 0]
