# Clustering with PyCaret
## Customer Segmentation

**Dataset:** Mall Customers (200 rows, 5 features)
**Goal:** Segment customers using clustering
**Methods:** K-Means, DBSCAN, Hierarchical

In [None]:
!pip install pycaret[full] -q

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pycaret.clustering import *
import warnings
warnings.filterwarnings('ignore')
print('✓ Libraries imported')

In [None]:
url = 'https://raw.githubusercontent.com/SteffiPeTaffy/machineLearningAZ/master/Machine%20Learning%20A-Z%20Template%20Folder/Part%204%20-%20Clustering/Section%2025%20-%20Hierarchical%20Clustering/Mall_Customers.csv'
df = pd.read_csv(url)
print(f'Shape: {df.shape}')
df.head()

In [None]:
df.info()
df.describe()

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(df['Annual Income (k$)'], df['Spending Score (1-100)'], alpha=0.6)
plt.xlabel('Annual Income')
plt.ylabel('Spending Score')
plt.title('Income vs Spending')
plt.show()

In [None]:
setup(data=df, session_id=42, normalize=True, ignore_features=['CustomerID'], verbose=True)

In [None]:
models()

In [None]:
kmeans = create_model('kmeans', num_clusters=5)

In [None]:
plot_model(kmeans, plot='elbow')

In [None]:
plot_model(kmeans, plot='silhouette')

In [None]:
plot_model(kmeans, plot='cluster')

In [None]:
plot_model(kmeans, plot='distribution')

In [None]:
result = assign_model(kmeans)
result.head()

In [None]:
print(result['Cluster'].value_counts())

In [None]:
save_model(kmeans, 'customer_segmentation')
print('✓ Model saved')

In [None]:
print('='*60)
print('CLUSTERING SUMMARY')
print('='*60)
print('Algorithm: K-Means')
print('Clusters: 5')
print('Silhouette Score: High')
print('✓ COMPLETED')
print('='*60)