In [None]:
# ðŸ§  DBSCAN Credit Card Segmentation Project

# Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN

sns.set(style="whitegrid")

# Step 2: Load dataset directly from GitHub or local
url = "https://raw.githubusercontent.com/selva86/datasets/master/CreditCard.csv"
df = pd.read_csv(url)
print("Dataset loaded successfully âœ…")
print("Shape:", df.shape)
df.head()

# Step 3: Select features
X = df[['BALANCE', 'PURCHASES', 'CREDIT_LIMIT', 'PAYMENTS']].dropna()

# Step 4: Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 5: Apply DBSCAN
db = DBSCAN(eps=0.5, min_samples=8)
labels = db.fit_predict(X_scaled)

# Step 6: Add cluster labels
df['Cluster'] = -1
df.loc[X.index, 'Cluster'] = labels

# Step 7: Results summary
print("Unique Clusters:", np.unique(labels))
print("Cluster Counts:")
print(df['Cluster'].value_counts())

# Step 8: Visualization
plt.figure(figsize=(8,6))
sns.scatterplot(
    x=X['BALANCE'],
    y=X['PURCHASES'],
    hue=labels,
    palette='Set2',
    s=60
)
plt.title("DBSCAN Clustering - Credit Card Customers")
plt.xlabel("Balance")
plt.ylabel("Purchases")
plt.show()

# Step 9: Final Insights
num_clusters = len(set(labels)) - (1 if -1 in labels else 0)
num_outliers = list(labels).count(-1)
print(f"Total Clusters: {num_clusters}")
print(f"Outliers Detected: {num_outliers}")
