In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN

In [None]:
# Muat data
df = pd.read_csv('Data_Alfamart Indomaret_South Jakarta.csv')
print(df.head())

In [None]:
# Menampilkan jumlah masing-masing store
store_counts = df['store'].value_counts()
print(store_counts)

# Menampilkan total keseluruhan
total = store_counts.sum()
print(f"Total seluruh store: {total}")

In [None]:
print(df.shape)

In [None]:
# Siapkan data koordinat (latitude, longitude)
coords = df[['latitude', 'longitude']].to_numpy()
coords_rad = np.radians(coords)  # untuk jarak haversine

In [None]:
# Terapkan DBSCAN dengan metric haversine (eps dalam satuan km dibagi dengan radius bumi)
kms_per_radian = 6371.0088
eps_km = 1.5  # misalnya 1.5 km radius cluster
db = DBSCAN(eps=eps_km / kms_per_radian, min_samples=5, metric='haversine')
df['cluster'] = db.fit_predict(coords_rad)

In [None]:
# Ringkasan hasil clustering
print(df.groupby(['store', 'cluster']).size().unstack(fill_value=0))
print("Jumlah outlier:", (df['cluster'] == -1).sum())

In [None]:
# Visualisasi hasil clustering dengan warna khusus
plt.figure(figsize=(10, 8))

color_mapping = {
    'Alfamart': 'red',
    'Indomaret': 'blue'
}

for store_name, color in color_mapping.items():
    subset = df[df['store'] == store_name]
    plt.scatter(subset['longitude'], subset['latitude'], 
                c=color, label=store_name, s=30, alpha=0.7, edgecolors='black')

# Tandai outlier (jika ada)
outliers = df[df['cluster'] == -1]
plt.scatter(outliers['longitude'], outliers['latitude'], 
            color='black', label='Outliers', s=50, edgecolors='white')

plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('DBSCAN Clustering of Alfamart (Merah) & Indomaret (Biru)')
plt.legend()
plt.grid(True)
plt.show()
