In [None]:
!pip install numpy pandas scikit-learn matplotlib scipy

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.cluster import AgglomerativeClustering
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import dendrogram, linkage

# Load the Iris dataset from sklearn
iris = datasets.load_iris()

# Create a pandas DataFrame from the dataset
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)

# Select features for clustering (using all features here)
X = df.values

# Standardizing the data (important for distance-based algorithms)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Perform Agglomerative Clustering (Remove the 'affinity' parameter)
agg_clust = AgglomerativeClustering(n_clusters=3, linkage='ward')
labels = agg_clust.fit_predict(X_scaled)

# Print the resulting labels for each data point
print("Cluster labels for each data point:")
print(labels)

# Plot the dendrogram to visualize the hierarchical clustering process
linked = linkage(X_scaled, 'ward')
plt.figure(figsize=(10, 7))
dendrogram(linked)
plt.title('Dendrogram for Agglomerative Clustering')
plt.xlabel('Data points')
plt.ylabel('Euclidean distance')
plt.show()

# Visualize the Agglomerative Clustering result (using the first two features)
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=labels, cmap='viridis', marker='o', edgecolor='k', alpha=0.7)
plt.title("Agglomerative Clustering of Iris Dataset")
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])
plt.show()
