<a href="https://colab.research.google.com/github/samuelhtampubolon/SDPM2025/blob/main/DBSCAN_data_Wine_dan_Sweep_Parameter_eps_dan_min_samples.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Clustering density-based; pilih kombinasi eps/min_samples terbaik via silhouette.
from sklearn.datasets import load_wine
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
from sklearn.metrics import silhouette_score
import numpy as np

X, _ = load_wine(return_X_y=True)
X = StandardScaler().fit_transform(X)

def evaluate_dbscan(eps, min_samples):
    labels = DBSCAN(eps=eps, min_samples=min_samples).fit_predict(X)
    unique = set(labels)
    n_clusters = len(unique) - (1 if -1 in unique else 0)
    noise_ratio = (labels == -1).mean()
    if n_clusters <= 1:
        return -1, n_clusters, noise_ratio, labels
    sil = silhouette_score(X, labels)
    return sil, n_clusters, noise_ratio, labels

best = {"score": -1, "eps": -1, "min_samples": -1, "n_clusters": -1, "noise_ratio": -1}
for eps in np.linspace(0.3, 1.2, 10):
    for ms in [3, 5, 10]:
        sil, ncl, noise, labels = evaluate_dbscan(eps, ms)
        if sil > best.get("score", -1):
            best = {"score": sil, "eps": float(eps), "min_samples": ms,
                    "n_clusters": int(ncl), "noise_ratio": float(noise)}

In [5]:
print(f"Best eps={best['eps']:.2f}, min_samples={best['min_samples']}, "
      f"Silhouette={best['score']:.3f}, Clusters={best['n_clusters']}, Noise={best['noise_ratio']:.2f}")

Best eps=-1.00, min_samples=-1, Silhouette=-1.000, Clusters=-1, Noise=-1.00
