
# Unsupervised Learning Lab: Customer Segmentation - Solutions

## Exercise 1: Data Preprocessing and Exploration
```python
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Load the dataset
df = pd.read_csv('customer_data.csv')

# Explore the dataset
print(df.head())
print(df.describe())

# Standardize the numerical features
scaler = StandardScaler()
df[['Age', 'AnnualIncome', 'SpendingScore']] = scaler.fit_transform(df[['Age', 'AnnualIncome', 'SpendingScore']])
```

## Exercise 2: K-Means Clustering
```python
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

# Apply K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
df['KMeans_Cluster'] = kmeans.fit_predict(df[['Age', 'AnnualIncome', 'SpendingScore']])

# Determine the optimal number of clusters using the Elbow method
sse = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(df[['Age', 'AnnualIncome', 'SpendingScore']])
    sse.append(kmeans.inertia_)

plt.figure(figsize=(10, 6))
plt.plot(range(1, 11), sse, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('SSE')
plt.show()

# Visualize the clusters
plt.figure(figsize=(10, 6))
plt.scatter(df['AnnualIncome'], df['SpendingScore'], c=df['KMeans_Cluster'], cmap='viridis')
plt.title('K-Means Clustering')
plt.xlabel('Annual Income')
plt.ylabel('Spending Score')
plt.show()
```

## Exercise 3: Hierarchical Clustering
```python
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering

# Apply Agglomerative Hierarchical Clustering
Z = linkage(df[['Age', 'AnnualIncome', 'SpendingScore']], method='ward')

plt.figure(figsize=(10, 6))
dendrogram(Z)
plt.title('Dendrogram')
plt.xlabel('Customers')
plt.ylabel('Euclidean distances')
plt.show()

# Use the dendrogram to determine the optimal number of clusters
hc = AgglomerativeClustering(n_clusters=3)
df['Hierarchical_Cluster'] = hc.fit_predict(df[['Age', 'AnnualIncome', 'SpendingScore']])

# Visualize the clusters
plt.figure(figsize=(10, 6))
plt.scatter(df['AnnualIncome'], df['SpendingScore'], c=df['Hierarchical_Cluster'], cmap='viridis')
plt.title('Hierarchical Clustering')
plt.xlabel('Annual Income')
plt.ylabel('Spending Score')
plt.show()
```

## Exercise 4: DBSCAN Clustering
```python
from sklearn.cluster import DBSCAN

# Apply DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=5)
df['DBSCAN_Cluster'] = dbscan.fit_predict(df[['Age', 'AnnualIncome', 'SpendingScore']])

# Visualize the clusters
plt.figure(figsize=(10, 6))
plt.scatter(df['AnnualIncome'], df['SpendingScore'], c=df['DBSCAN_Cluster'], cmap='viridis')
plt.title('DBSCAN Clustering')
plt.xlabel('Annual Income')
plt.ylabel('Spending Score')
plt.show()
```
