In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Load Iris dataset
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)

# Standardize the dataset (helps KMeans perform better)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Function to perform KMeans clustering
def perform_kmeans(X, k):
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X)
    return kmeans

# Function to plot the elbow method to suggest the best k
def plot_elbow(X):
    distortions = []
    K = range(1, 11)  # Trying k values from 1 to 10
    for k in K:
        kmeans = KMeans(n_clusters=k, random_state=42)
        kmeans.fit(X)
        distortions.append(kmeans.inertia_)

    # Plot the elbow curve
    plt.figure(figsize=(8, 6))
    plt.plot(K, distortions, 'bo-')
    plt.xlabel('Number of Clusters (k)')
    plt.ylabel('Inertia (Distortion)')
    plt.title('Elbow Method for Optimal k')
    plt.show()

# Step 1: Use the elbow method to suggest the best value for k
print("Using the Elbow method to suggest the best k value...")
plot_elbow(X_scaled)

# Step 2: Take user input for k
k = int(input("Enter the number of clusters (k): "))

# Step 3: Perform KMeans clustering with the user-selected k
kmeans = perform_kmeans(X_scaled, k)

# Step 4: Display cluster centers and labels
print(f"\nCluster Centers for k={k}:")
print(kmeans.cluster_centers_)
print(f"\nCluster Labels:")
print(kmeans.labels_)

# Step 5: Visualize the clusters (using first two features for simplicity)
plt.figure(figsize=(8, 6))
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=kmeans.labels_, cmap='viridis')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='red', label='Centroids')
plt.title(f"K-Means Clustering with k={k}")
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])
plt.legend()
plt.show()
