# K-Means Clustering
This notebook applies K-Means clustering on the processed dataset.

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

# Load the preprocessed dataset
df = pd.read_csv('/Users/sagarbk/Documents/WIL/customer_churn_processed.csv')
df.head()


In [None]:

# Elbow method to find optimal number of clusters
inertia = []
K = range(1, 11)

for k in K:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(df)
    inertia.append(kmeans.inertia_)

# Plot the Elbow Method
plt.figure(figsize=(8, 5))
plt.plot(K, inertia, 'bo-')
plt.xlabel('Number of clusters (K)')
plt.ylabel('Inertia')
plt.title('Elbow Method For Optimal K')
plt.grid(True)
plt.savefig('/Users/sagarbk/Documents/WIL/Clustering_Analysis/elbow_method.png')
plt.show()


In [None]:

# Apply KMeans with optimal K (example: 3)
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(df)
df['Cluster'] = clusters


In [None]:

# Visualize clusters using PCA for 2D plotting
pca = PCA(n_components=2)
reduced_data = pca.fit_transform(df.drop('Cluster', axis=1))

plt.figure(figsize=(8, 6))
plt.scatter(reduced_data[:, 0], reduced_data[:, 1], c=df['Cluster'], cmap='viridis')
plt.xlabel('PCA 1')
plt.ylabel('PCA 2')
plt.title('Customer Clusters (PCA-reduced)')
plt.colorbar()
plt.savefig('/Users/sagarbk/Documents/WIL/Clustering_Analysis/cluster_visualization.png')
plt.show()
