# Mall Customers Clustering and Visualization
This notebook uses K-Means clustering to segment mall customers based on their features.

In [None]:
# Step 1: Import Required Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [None]:
# Step 2: Load the Data
df = pd.read_csv('Mall_Customers.csv')
df.head()

## Step 3: Exploratory Data Analysis (EDA)

In [None]:
# Check basic info
df.info()

In [None]:
# Gender Distribution
sns.countplot(data=df, x='Gender')
plt.title('Gender Distribution')
plt.show()

In [None]:
# Distribution of Age, Income and Spending Score
df[['Age', 'Annual Income (k$)', 'Spending Score (1-100)']].hist(bins=10, figsize=(12, 5))
plt.tight_layout()
plt.show()

## Step 4: Prepare Data for Clustering

In [None]:
# Select numerical features for clustering
X = df[['Annual Income (k$)', 'Spending Score (1-100)']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

## Step 5: Elbow Method to Determine Optimal Clusters

In [None]:
inertia = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_scaled)
    inertia.append(kmeans.inertia_)

plt.plot(range(1, 11), inertia, 'bo-')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow Method')
plt.grid(True)
plt.show()

## Step 6: Apply K-Means with Optimal k

In [None]:
# Using k=3 as optimal cluster count
kmeans = KMeans(n_clusters=3, random_state=42)
df['Cluster'] = kmeans.fit_predict(X_scaled)
df

## Step 7: Visualize Clusters

In [None]:
sns.scatterplot(data=df, x='Annual Income (k$)', y='Spending Score (1-100)',
                hue='Cluster', palette='Set1', s=100)
plt.title('Customer Segments')
plt.show()