In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from sklearn.datasets import fetch_openml
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA

from sklearn.metrics import silhouette_score
from somperf.metrics import *
from somperf.utils.topology import rectangular_topology_dist

import optuna
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


##Data Analysis

In [2]:
df = pd.read_excel('dataset.xlsx', sheet_name='Sheet1') 
data = df.iloc[:,:-1].values

###Preprocessing

In [3]:
#Standarization
scaler = StandardScaler()

df_scaled = scaler.fit_transform(df.copy())
df_scaled = pd.DataFrame(df_scaled, columns=df.columns)

df_scaled.head(3)

Unnamed: 0,meanR,meanG,meanB,stdR,stdG,stdB,skewR,skewG,skewB,kurR,kurG,kurB,entR,entG,entB,Class
0,-0.665102,0.008693,-0.644699,-0.957046,-0.07479,-0.358296,-0.28026,-0.336533,0.694148,-0.27505,-0.357389,0.413462,0.26053,0.270445,-1.030791,-1.344481
1,-0.731039,-0.226412,-0.811958,-0.894219,-0.183617,-0.720664,0.071609,0.002909,0.695626,0.022708,-0.088418,0.532731,-0.107272,-0.118157,-1.012572,-1.344481
2,-0.178144,-0.392545,-0.305512,-0.087327,-0.407671,-0.370002,0.016748,0.039301,0.18218,-0.090864,-0.068511,0.161766,-0.20581,-0.210945,0.042868,-1.344481


In [4]:
#Split Dataset
X = df_scaled.drop(columns=['Class'])
y = df_scaled['Class']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

###Building Model

In [5]:
class SOMlinier:
    def __init__(self, m, fitur, R, alpha, tau):
        self.m = m 
        self.fitur = fitur 
        self.w = np.random.rand(m, fitur) 
        self.R0 = R 
        self.alpha0 = alpha
        self.tau = tau
    
    def hitung_bmu(self, x):
        # Menghitung jarak antara x dan setiap neuron
        D = np.linalg.norm(self.w - x, axis=1)
        # Kembalikan indeks dari BMU (neuron dengan jarak terdekat)
        return np.argmin(D)
    
    def hitung_second_bmu(self, x):
        # Menghitung jarak antara x dan setiap neuron
        D = np.linalg.norm(self.w - x, axis=1)
        # Urutkan indeks jarak, ambil indeks kedua terdekat
        return np.argsort(D)[1]
    
    def update_bobot(self, x, j, R, alpha):
        for i in range(self.m):
            jarak = abs(i - j)
            if jarak <= R:
                influence = np.exp(-jarak**2 / (2 * (R**2)))
                self.w[i] += alpha * influence * (x - self.w[i])
    
    def kurangi_radius(self, iterasi):
        return self.R0 * np.exp(-iterasi / self.tau)

    def kurangi_learning_rate(self, iterasi):
        return self.alpha0 * np.exp(-iterasi / self.tau)

    def train(self, data, n_iterasi):
        for t in range(n_iterasi):
            x = data[np.random.randint(0, len(data))]
            j = self.hitung_bmu(x)
            R = self.kurangi_radius(t)
            alpha = self.kurangi_learning_rate(t)
            self.update_bobot(x, j, R, alpha)

    def quantization_error(self, data):
        # Menghitung Quantization Error (QE) sebagai rata-rata jarak antara data dan BMU-nya
        total_error = np.mean([np.linalg.norm(x - self.w[self.hitung_bmu(x)]) for x in data])
        return total_error
    
    def topographic_error(self, data):
        # Menghitung Topographic Error (TE)
        errors = 0
        for x in data:
            bmu = self.hitung_bmu(x)
            second_bmu = self.hitung_second_bmu(x)
            # Cek jika BMU dan second BMU bertetangga (bedanya hanya 1 di grid linear)
            if abs(bmu - second_bmu) > 1:
                errors += 1
        te = errors / len(data)
        return te

In [6]:
X_train_np = X_train.values  # Ubah X_train dari DataFrame ke numpy array

# Inisialisasi SOM tanpa PCA dan t-SNE
m = 15
fitur = X_train_np.shape[1]  # Sesuaikan fitur berdasarkan dimensi X_train_np
R = 1
alpha0 = 0.11571090995138311
tau = 984

som_normal = SOMlinier(m=m, fitur=fitur, R=R, alpha=alpha0, tau=tau)

# Train SOM dengan data tanpa PCA dan t-SNE
n_iterations = 982
som_normal.train(X_train_np, n_iterations)

# Akses bobot SOM yang telah dilatih
weights_normal = som_normal.w
print("Trained SOM weights:", weights_normal)

Trained SOM weights: [[ 0.02504595  0.71419404  0.77754771  0.79564797  0.18255689  0.83230459
   0.88730021  0.05546058  0.83677598  0.87132344  0.85498082  0.85505634
   0.86901416  0.35074911  0.43426447]
 [-1.24120682 -1.58690112 -0.77059412 -1.04651    -1.61461989 -0.70228196
   1.78177092  1.86308761  0.73945208  1.85557527  1.93314785  0.73378613
  -1.63899396 -1.68149604 -1.09298556]
 [ 0.81312198  0.16098081  0.88835544  0.30822114  0.48982448  0.10296177
   0.73466814  0.20053874  0.39888647  0.57780246  0.46701136  0.68173711
   0.74337929  0.36791828  0.31320595]
 [ 0.37916565  0.37109255  0.62804972  0.94391822  0.89313435  0.71429159
   0.73115842  0.69791036  0.18850533  0.27032008  0.93053451  0.4230166
   0.87218931  0.09021422  0.52975253]
 [ 0.36076909  0.89926856  0.24651433  0.03354023  0.36005193  0.08886801
   0.46466296  0.47419092  0.45750369  0.30194527  0.70567742  0.62090827
   0.64264628  0.2901956   0.54179993]
 [ 0.43024891  0.75733405  0.35995747  0.7955

###Evaluasi

In [7]:
# Menghitung QE dan TE menggunakan numpy array (X_train_np)
qe = som_normal.quantization_error(X_train_np)
te = som_normal.topographic_error(X_train_np)

print("Quantization Error (QE):", qe)
print("Topographic Error (TE):", te)

Quantization Error (QE): 1.878169586672665
Topographic Error (TE): 0.6416887709991158


In [8]:

silhouette_avg = silhouette_score(X_train, y_train.values)  # Gunakan y_train untuk mencocokkan data pada X_pca
print("Silhouette Score:", silhouette_avg)

Silhouette Score: -0.06274745154438173


In [9]:
# Fungsi objektif untuk optimasi Optuna
def objective(trial):
    # Tuning parameter
    m = trial.suggest_int('m', 5, 20)           # Jumlah neuron
    R = trial.suggest_int('R', 1, 5)            # Radius
    alpha0 = trial.suggest_float('alpha0', 0.01, 0.5)  # Learning rate
    tau = trial.suggest_int('tau', 100, 1000)   # Konstanta peluruhan
    n_iterations = trial.suggest_int('n_iterations', 500, 2000)  # Iterasi pelatihan
    
    # Inisialisasi dan latih SOM dengan parameter yang dipilih
    som = SOMlinier(m=m, fitur=X_train_np.shape[1], R=R, alpha=alpha0, tau=tau)
    som.train(X_train_np, n_iterations)

    # Hitung QE, TE, dan Silhouette Score
    qe = som.quantization_error(X_train_np)
    te = som.topographic_error(X_train_np)

    # Hitung BMU untuk setiap data dan hitung Silhouette Score
    bmu_indices = np.array([som.hitung_bmu(x) for x in X_train_np])
    silhouette = silhouette_score(X_train_np, bmu_indices)
    
    # Kombinasikan ketiga metrik dengan bobot tertentu
    # Di sini, kita meminimalkan QE dan TE, tetapi memaksimalkan Silhouette Score
    # Jadi kita gunakan negatif Silhouette Score (karena Optuna meminimalkan nilai)
    combined_score = (0.4 * qe) + (0.4 * te) - (0.2 * silhouette)

    return combined_score

# Buat study untuk optimasi
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)

# Cetak hasil terbaik
print("Best parameters for SOM:", study.best_params)
print("Best combined score:", study.best_value)


[I 2024-11-16 23:49:49,884] A new study created in memory with name: no-name-3a528e17-b4b8-4109-b17f-0eb905e62b3f


[I 2024-11-16 23:49:50,247] Trial 0 finished with value: 0.5858940287066234 and parameters: {'m': 20, 'R': 5, 'alpha0': 0.2288584189006165, 'tau': 246, 'n_iterations': 930}. Best is trial 0 with value: 0.5858940287066234.
[I 2024-11-16 23:49:50,591] Trial 1 finished with value: 0.7214404509163707 and parameters: {'m': 10, 'R': 2, 'alpha0': 0.3298367356173652, 'tau': 887, 'n_iterations': 1913}. Best is trial 0 with value: 0.5858940287066234.
[I 2024-11-16 23:49:50,941] Trial 2 finished with value: 0.7376867367327156 and parameters: {'m': 7, 'R': 3, 'alpha0': 0.31511240619939535, 'tau': 478, 'n_iterations': 1498}. Best is trial 0 with value: 0.5858940287066234.
[I 2024-11-16 23:49:51,274] Trial 3 finished with value: 0.7144623412996193 and parameters: {'m': 6, 'R': 4, 'alpha0': 0.1409970474288818, 'tau': 449, 'n_iterations': 1557}. Best is trial 0 with value: 0.5858940287066234.
[I 2024-11-16 23:49:51,621] Trial 4 finished with value: 0.5983258692423525 and parameters: {'m': 20, 'R': 4, 

Best parameters for SOM: {'m': 20, 'R': 5, 'alpha0': 0.2288584189006165, 'tau': 246, 'n_iterations': 930}
Best combined score: 0.5858940287066234


In [None]:
map_size = (6, 1)  # 6 neurons in 1 row (linear SOM)

# Ambil bobot dari SOM hasil PCA yang sudah dilatih
weights_normal = som_normal.w  # weights_normal seharusnya berbentuk (6, 1)

# Hitung beberapa metrik menggunakan somperf
print('Topographic Product =', topographic_product(rectangular_topology_dist(map_size), weights_normal))
print('Neighborhood Preservation =', neighborhood_preservation(1, weights_normal, X_train))
print('Trustworthiness =', trustworthiness(1, weights_normal, X_train))

Topographic Product = -0.03841065105903301
Neighborhood Preservation = -7.894317192184706
Trustworthiness = 0.894162591318731


###Tuning