In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster

# Read the CSV file into a pandas DataFrame
path_to_csv = 'Mall_Customers.csv'
df = pd.read_csv(path_to_csv)

# Select the features for clustering
selected_features = df[["Annual Income (k$)", "Spending Score (1-100)"]]

# Perform hierarchical clustering using the linkage function
linkage_matrix = linkage(selected_features, method='ward')

# Determine the clusters using fcluster
max_d = 150  # Adjust this value based on the dendrogram
clusters = fcluster(linkage_matrix, t=max_d, criterion='distance')

# Add the cluster labels to the DataFrame
df['Cluster'] = clusters

# Plot the clusters
plt.figure(figsize=(10, 6))

# Create scatter plot for each cluster
for cluster_id in df['Cluster'].unique():
    cluster_data = df[df['Cluster'] == cluster_id]
    plt.scatter(cluster_data["Annual Income (k$)"], cluster_data["Spending Score (1-100)"], label=f'Cluster {cluster_id}')

plt.xlabel("Annual Income (k$)")
plt.ylabel("Spending Score (1-100)")
plt.title("Clusters of Mall Customers")
plt.legend()
plt.show()
