# Tugas 2 Clustering Lokasi Gerai Kopi
Analisis menggunakan KMeans, Agglomerative, dan DBSCAN.
---
Dataset: `lokasi_gerai_kopi_clean.csv`


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.preprocessing import StandardScaler
import scipy.cluster.hierarchy as sch
    
plt.style.use("fivethirtyeight")


In [4]:
df = pd.read_csv("lokasi_gerai_kopi_clean.csv")
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'lokasi_gerai_kopi_clean.csv'

In [None]:
df.info()
df.describe()

In [None]:
X = df[["x", "y"]].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
wcss = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, init="k-means++", random_state=42)
    kmeans.fit(X_scaled)
    wcss.append(kmeans.inertia_)

plt.figure(figsize=(10, 5))
plt.plot(range(1, 11), wcss, marker='8', color="red", linewidth=2)
plt.xlabel("K Value")
plt.ylabel("WCSS")
plt.title("Elbow Method for Optimal K (KMeans)")
plt.show()

In [None]:
km = KMeans(n_clusters=5, random_state=42)
df["kmeans_label"] = km.fit_predict(X_scaled)

plt.figure(figsize=(10, 6))
plt.scatter(df["x"], df["y"], c=df["kmeans_label"], cmap="Set1", s=50)
plt.title("Lokasi Gerai Kopi - KMeans Clustering")
plt.xlabel("X")
plt.ylabel("Y")
plt.show()

In [None]:
sample_df = df.sample(500, random_state=42)
sample_X_scaled = scaler.fit_transform(sample_df[["x", "y"]])

linkage_matrix = sch.linkage(sample_X_scaled, method="ward")

plt.figure(figsize=(12, 6))
sch.dendrogram(linkage_matrix)
plt.title("Dendrogram – Hierarchical Clustering (Sample Gerai Kopi)")
plt.xlabel("Gerai (Indeks Sample)")
plt.ylabel("Euclidean Distance")
plt.show()

In [None]:
agglo = AgglomerativeClustering(n_clusters=5)
df["agglo_label"] = agglo.fit_predict(X_scaled)

plt.figure(figsize=(10, 6))
plt.scatter(df["x"], df["y"], c=df["agglo_label"], cmap="tab10", s=50)
plt.title("Lokasi Gerai Kopi - Agglomerative Clustering")
plt.xlabel("X")
plt.ylabel("Y")
plt.show()

In [None]:
dbscan = DBSCAN(eps=0.5, min_samples=5)
df["dbscan_label"] = dbscan.fit_predict(X_scaled)

plt.figure(figsize=(10, 6))
unique_labels = set(df["dbscan_label"])
for label in unique_labels:
    cluster = df[df["dbscan_label"] == label]
    if label == -1:
        plt.scatter(cluster["x"], cluster["y"], c='red', marker='x', s=50, label='Noise')
    else:
        plt.scatter(cluster["x"], cluster["y"], s=50, label=f'Cluster {label}')
plt.title("Lokasi Gerai Kopi - DBSCAN Clustering")
plt.xlabel("X")
plt.ylabel("Y")
plt.legend()
plt.show()

In [None]:
print("KMeans Cluster Distribution:")
print(df["kmeans_label"].value_counts())

print("\nAgglomerative Cluster Distribution:")
print(df["agglo_label"].value_counts())

print("\nDBSCAN Cluster Distribution:")
print(df["dbscan_label"].value_counts())