In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans

file_path = "Mall_Customers.csv"
df = pd.read_csv(file_path)

print("Data Preview:")
print(df.head())

print("\nMissing Values:")
print(df.isnull().sum())

X = df.iloc[:, [3, 4]].values

wcss = []  # Within-Cluster Sum of Squares
for k in range(1, 11):  # Checking k from 1 to 10
    kmeans = KMeans(n_clusters=k, init="k-means++", random_state=42)
    kmeans.fit(X)
    wcss.append(kmeans.inertia_)

plt.figure(figsize=(8, 5))
plt.plot(range(1, 11), wcss, marker="o", linestyle="--", color="blue")
plt.xlabel("Number of Clusters (k)")
plt.ylabel("WCSS")
plt.title("Elbow Method for Optimal k")
plt.show()

kmeans = KMeans(n_clusters=5, init="k-means++", random_state=42)
clusters = kmeans.fit_predict(X) 

df["Cluster"] = clusters

plt.figure(figsize=(10, 6))
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=clusters, palette="viridis", s=100)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], 
            s=300, c="red", label="Centroids", marker="X")
plt.xlabel("Annual Income (k$)")
plt.ylabel("Spending Score (1-100)")
plt.title("Customer Segmentation using K-Means")
plt.legend()
plt.show()

df.to_csv("Clustered_Customers.csv", index=False)
print("Clustered data saved as 'Clustered_Customers.csv'")
