In [None]:

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage

# Generate sample data
np.random.seed(0)
mean1 = [0, 0]
cov1 = [[0.5, 0.1], [0.1, 0.5]]
data1 = np.random.multivariate_normal(mean1, cov1, 100)

mean2 = [5, 5]
cov2 = [[1, 0.5], [0.5, 1]]
data2 = np.random.multivariate_normal(mean2, cov2, 100)

data = np.vstack((data1, data2))

# Standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
data = scaler.fit_transform(data)

# Create a linkage matrix
Z = linkage(data, method='ward')

# Create a dendrogram
plt.figure(figsize=(10, 7))
dendrogram(Z)
plt.title("Hierarchical Clustering Dendrogram")
plt.show()

# Perform hierarchical clustering
cluster = AgglomerativeClustering(n_clusters=2, linkage='ward')
cluster.fit(data)
labels = cluster.labels_

# Visualize the clusters
plt.figure(figsize=(10, 7))
plt.scatter(data[:, 0], data[:, 1], c=labels, cmap='viridis')
plt.title("Hierarchical Clustering")
plt.show()
