In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import pdist

# Generate a sample dataset
np.random.seed(0)
X = np.random.rand(10, 2)

# Calculate the distance matrix
dist_matrix = pdist(X)

# Generate the linkage matrix
Z = linkage(dist_matrix, method='ward')

# Create a figure with a dendrogram
plt.figure(figsize=(10, 7))
dendrogram(Z, leaf_rotation=90, leaf_font_size=12)

# Annotate thresholds
thresholds = [0.5, 1.0, 1.5]
for threshold in thresholds:
    plt.axhline(y=threshold, color='red', linestyle='--', label=f'Threshold={threshold}')

plt.title("Custom Dendrogram with Annotated Thresholds")
plt.legend()
plt.show()

# Determine optimal cluster number based on threshold
from scipy.cluster.hierarchy import fcluster
optimal_cluster_number = []
for threshold in thresholds:
    cluster_labels = fcluster(Z, t=threshold, criterion='distance')
    optimal_cluster_number.append(len(np.unique(cluster_labels)))

print("Optimal cluster numbers for different thresholds:", optimal_cluster_number)

