In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs
from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score, adjusted_rand_score

In [28]:
data = pd.read_csv("./data/experiment_results.csv")
data_scaled = pd.read_csv("./data/experiment_results_scaled.csv")

In [51]:
# 데이터 생성
X = data.to_numpy()

# 표준 K-means 클러스터링
kmeans_standard = KMeans(n_clusters=3, random_state=42)
kmeans_standard.fit(X)
standard_labels = kmeans_standard.labels_
standard_score = silhouette_score(X, standard_labels)
dbi_standard = davies_bouldin_score(X, standard_labels)
chi_standard = calinski_harabasz_score(X, standard_labels)

print(f"Silhouette Score for standard K-means: {standard_score:.2f}<br>dbi_standard for standard K-means: {dbi_standard:.2f}<br>chi_standard for standard K-means: {chi_standard:.2f}")

  super()._check_params_vs_input(X, default_n_init=10)


Silhouette Score for standard K-means: 0.54<br>dbi_standard for standard K-means: 0.59<br>chi_standard for standard K-means: 85.46


In [52]:
# 각 특성의 분산 계산
variances = np.var(X, axis=0)

# 분산의 역수를 가중치로 사용
weights = 1 / variances

# 데이터에 가중치 적용
X_weighted = X * np.sqrt(weights)

# 가중치 적용 K-means 클러스터링
kmeans_weighted = KMeans(n_clusters=3, random_state=42)
kmeans_weighted.fit(X_weighted)
weighted_labels = kmeans_weighted.labels_
standard_score = silhouette_score(X, standard_labels)
dbi_standard = davies_bouldin_score(X, standard_labels)
chi_standard = calinski_harabasz_score(X, standard_labels)

print(f"Silhouette Score for standard K-means: {standard_score:.2f}<br>dbi_standard for standard K-means: {dbi_standard:.2f}<br>chi_standard for standard K-means: {chi_standard:.2f}")

  super()._check_params_vs_input(X, default_n_init=10)


Silhouette Score for standard K-means: 0.54<br>dbi_standard for standard K-means: 0.59<br>chi_standard for standard K-means: 85.46


In [53]:
# 데이터 생성
X = data_scaled.to_numpy()

# 표준 K-means 클러스터링
kmeans_standard = KMeans(n_clusters=3, random_state=42)
kmeans_standard.fit(X)
standard_labels = kmeans_standard.labels_
standard_score = silhouette_score(X, standard_labels)
dbi_standard = davies_bouldin_score(X, standard_labels)
chi_standard = calinski_harabasz_score(X, standard_labels)

print(f"Silhouette Score for standard K-means: {standard_score:.2f}<br>dbi_standard for standard K-means: {dbi_standard:.2f}<br>chi_standard for standard K-means: {chi_standard:.2f}")

  super()._check_params_vs_input(X, default_n_init=10)


Silhouette Score for standard K-means: 0.38<br>dbi_standard for standard K-means: 0.94<br>chi_standard for standard K-means: 23.24


In [54]:
# 각 특성의 분산 계산
variances = np.var(X, axis=0)

# 분산의 역수를 가중치로 사용
weights = 1 / variances

# 데이터에 가중치 적용
X_weighted = X * np.sqrt(weights)

# 가중치 적용 K-means 클러스터링
kmeans_weighted = KMeans(n_clusters=3, random_state=42)
kmeans_weighted.fit(X_weighted)
weighted_labels = kmeans_weighted.labels_
standard_score = silhouette_score(X, standard_labels)
dbi_standard = davies_bouldin_score(X, standard_labels)
chi_standard = calinski_harabasz_score(X, standard_labels)

print(f"Silhouette Score for standard K-means: {standard_score:.2f}<br>dbi_standard for standard K-means: {dbi_standard:.2f}<br>chi_standard for standard K-means: {chi_standard:.2f}")

  super()._check_params_vs_input(X, default_n_init=10)


Silhouette Score for standard K-means: 0.38<br>dbi_standard for standard K-means: 0.94<br>chi_standard for standard K-means: 23.24


In [55]:
# 데이터 생성
X = data.to_numpy()

# Gaussian Mixture Model 클러스터링
gmm = GaussianMixture(n_components=3, random_state=42, covariance_type='full')
gmm.fit(X)
gmm_labels = gmm.predict(X)


standard_score = silhouette_score(X, gmm_labels)
dbi_standard = davies_bouldin_score(X, gmm_labels)
chi_standard = calinski_harabasz_score(X, gmm_labels)

print(f"Silhouette Score for standard K-means: {standard_score:.2f}<br>dbi_standard for standard K-means: {dbi_standard:.2f}<br>chi_standard for standard K-means: {chi_standard:.2f}")

Silhouette Score for standard K-means: 0.57<br>dbi_standard for standard K-means: 0.65<br>chi_standard for standard K-means: 85.02




In [57]:
# 데이터 생성
X = data_scaled.to_numpy()

# Gaussian Mixture Model 클러스터링
gmm = GaussianMixture(n_components=3, random_state=42, covariance_type='diag')
gmm.fit(X)
gmm_labels = gmm.predict(X)


standard_score = silhouette_score(X, gmm_labels)
dbi_standard = davies_bouldin_score(X, gmm_labels)
chi_standard = calinski_harabasz_score(X, gmm_labels)

print(f"Silhouette Score for standard K-means: {standard_score:.2f}<br>dbi_standard for standard K-means: {dbi_standard:.2f}<br>chi_standard for standard K-means: {chi_standard:.2f}")

Silhouette Score for standard K-means: 0.38<br>dbi_standard for standard K-means: 0.94<br>chi_standard for standard K-means: 23.24




||"k-means"|"k-means weighted"|"GMM"|
|--|--|--|--|
|data|Silhouette Score for standard K-means: 0.54<br>dbi_standard for standard K-means: 0.59<br>chi_standard for standard K-means: 85.46|Silhouette Score for standard K-means: 0.54<br>dbi_standard for standard K-means: 0.59<br>chi_standard for standard K-means: 85.46|Silhouette Score for standard K-means: 0.57<br>dbi_standard for standard K-means: 0.65<br>chi_standard for standard K-means: 85.02|
|scaled data|Silhouette Score for standard K-means: 0.38<br>dbi_standard for standard K-means: 0.94<br>chi_standard for standard K-means: 23.24|Silhouette Score for standard K-means: 0.38<br>dbi_standard for standard K-means: 0.94<br>chi_standard for standard K-means: 23.24|Silhouette Score for standard K-means: 0.38<br>dbi_standard for standard K-means: 0.94<br>chi_standard for standard K-means: 23.24|