In [1]:
from collections import Counter
from sklearn.metrics import pairwise_distances
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt 
from matplotlib.ticker import MultipleLocator
from silhouette_upper_bound import upper_bound_samples, upper_bound
from sklearn.datasets import make_blobs
from utils import algorithm_kmedoids
from sklearn.metrics import silhouette_samples

In [2]:
def _row_f(row: np.ndarray, n: int, m=1) -> float:

    y = np.sum(row[m - 1 :])
    # Initialize q
    if m == 1:
        x = 0
        q = 1
    else:
        x = np.sum(row[: m - 1])
        q = (x / (m - 1)) / (y / (n - m))

    bound_list = [1 - q]
    k_list = [m]

    for k in range(m + 1, n - m + 1):
        d_to_move = row[k - 2]

        x += d_to_move
        y -= d_to_move

        q_candidate = (x / (k - 1)) / (y / (n - k))

        bound_list.append(1 - q_candidate)
        k_list.append(k)

        if q_candidate < q:
            q = q_candidate

    return k_list, bound_list

In [3]:
X, _ = make_blobs(
        n_samples=1000,
        n_features=300,
        centers=5,
        cluster_std=2.0,
        random_state=0,
    )

In [4]:
X.shape 

(1000, 300)

In [5]:
D = pairwise_distances(X)
n = D.shape[0]

In [6]:
cluster_labels = algorithm_kmedoids(D, 5, fast=True)

In [7]:
silh_samples = silhouette_samples(X=D, labels=cluster_labels, metric='precomputed')

In [8]:
D_hat = np.sort(D[~np.eye(D.shape[0], dtype=bool)].reshape(D.shape[0], -1))

In [25]:
samples = upper_bound_samples(D)

In [26]:
argm = np.argmin(samples)

In [27]:
samples[argm]

np.float64(0.6510639355599324)

In [28]:
achieved_silh = silh_samples[argm]
achieved_silh

np.float64(0.6457547899839603)

In [29]:
row = D_hat[argm,:]
k_list, bound_list = _row_f(row, n)

In [51]:
import matplotlib.pyplot as plt
import numpy as np

# Use a professional style
plt.style.use('seaborn-v0_8-white') # 'seaborn-v0_8-darkgrid' or 'ggplot' or 'fivethirtyeight'

# Create a figure and an axes object
fig, ax = plt.subplots(figsize=(9, 6))

# Plot the main data line with style
ax.plot(
    k_list[1:], 
    bound_list[1:], 
    label=r"$1-q(\k,k)$", 
    color="#336699",      # A professional blue color
    linewidth=3,          # Slightly thicker line for the main data
    zorder=2              # Ensure this line is on top of grid/other lines
)

# Add the horizontal reference line using the 'ax' object
ax.axhline(
    y=achieved_silh,
    color="crimson",      # A strong red for contrast
    linestyle="--",
    linewidth=2,
    label="Achieved silhouette width"
)

# Add the vertical reference lines using the 'ax' object
ax.axvline(
    x=2,
    color="black",
    linestyle="-.",       # Dash-dot style
    linewidth=1.5,
    label=r"Reference 1 ($k=2$)",
    alpha=0.7             # Make vertical lines slightly transparent
)

ax.axvline(
    x=200,
    color="black",
    linestyle="-.",
    linewidth=1.5,
    label=r"Reference 2 ($k=200$)",
    alpha=0.7
)

# Optional: Highlight specific intersection points for emphasis
# We can add markers manually if we know key X values
ax.scatter(2, ax.get_ylim()[0], color='gray', marker='v', s=100, zorder=3)
ax.scatter(200, ax.get_ylim()[0], color='gray', marker='v', s=100, zorder=3)


# --- Labels, Title, Legend, and Styling Refinements ---

ax.set_xlabel(r"$k$", fontsize=14, fontweight='semibold')
#ax.set_ylabel("Bound/Width Value", fontsize=13, fontweight='semibold')

# Customize the grid from the 'seaborn-darkgrid' style (optional if style is used)
# ax.grid(True, linestyle=':', alpha=0.6) 

# Add a legend with a title and put it outside the plot area
ax.legend(
    loc="center left", 
    bbox_to_anchor=(1, 0.5), # Anchor legend outside the plot area
    frameon=False,           # Remove the box around the legend
    title="",
    fontsize=14
)

# Set axis limits to give some breathing room
ax.set_xlim(-10, 310)
ax.set_ylim(0.5, 0.7) # Uncomment if you want to fix the Y-axis range

ax.tick_params(axis='both', which='major', labelsize=14)

# Optional: Tight layout to prevent labels/legend from being cut off
plt.tight_layout(rect=[0, 0, 0.85, 1]) # Adjust tight_layout to make room for legend

plt.savefig("q_search_demo.pdf", bbox_inches="tight")
plt.close()


In [41]:
np.mean(silh_samples)

np.float64(0.6677330849537492)