# Upper layer의 long bridge 조작 - trial1 w/ custom HNSW

In [44]:
import numpy as np
import h5py
import struct
import networkx as nx
from heapq import heapify, heappop, heappush, heapreplace, nlargest, nsmallest
from operator import itemgetter
from random import random

from numpy.f2py.auxfuncs import throw_error
from sklearn.cluster import KMeans
from collections import defaultdict

class HNSW:
    # self._graphs[level][i] contains a {j: dist} dictionary,
    # where j is a neighbor of i and dist is distance

   # L2 / cosine (스칼라) 함수는 기존대로 유지하되 안정성 보강
    def l2_distance(self, a, b):
        return float(np.linalg.norm(a - b))

    def cosine_distance(self, a, b):
        na = np.linalg.norm(a) + 1e-12
        nb = np.linalg.norm(b) + 1e-12
        return 1.0 - float(np.dot(a, b) / (na * nb))

    def vectorized_distance_(self, x, ys):
        ys_arr = np.asarray(ys)
        if ys_arr.ndim == 1:
            ys_arr = ys_arr.reshape(1, -1)
        if self.distance_type == "l2":
            # squared or actual norm: 기존 스칼라가 np.linalg.norm을 사용하므로 일관성 위해 norm 사용
            return np.linalg.norm(ys_arr - x, axis=1)
        elif self.distance_type == "cosine":
            x_norm = x / (np.linalg.norm(x) + 1e-12)
            ys_norm = ys_arr / (np.linalg.norm(ys_arr, axis=1, keepdims=True) + 1e-12)
            return 1.0 - (ys_norm @ x_norm)
        else:
            # fallback: 호출 가능한 distance_func으로 루프
            return np.array([self.distance_func(x, y) for y in ys_arr])

    def __init__(self, distance_type, M=5, efConstruction=200, Mmax=None):
        if distance_type == "l2":
            distance_func = self.l2_distance
        elif distance_type == "cosine":
            distance_func = self.cosine_distance
        else:
            raise TypeError('Please check your distance type!')
        self.distance_func = distance_func
        self.distance_type = distance_type  # 추가
        self.vectorized_distance = self.vectorized_distance_
        self._M = M
        self._efConstruction = efConstruction
        self._Mmax = 2 * M if Mmax is None else Mmax
        self._level_mult = 1 / np.log(M)
        self._graphs = []
        self._enter_point = None
        self.data = []
        self.visited_count = 0

        ##########
        self.visited_per_hop = []
        self.ann_per_hop = []
        #########

    ### Algorithm 1: INSERT
    def insert(self, q, efConstruction=None):

        if efConstruction is None:
            efConstruction = self._efConstruction

        distance = self.distance_func
        data = self.data
        graphs = self._graphs
        ep = self._enter_point
        M = self._M

        # line 4: determine level for the new element q
        l = int(-np.log(random()) * self._level_mult) + 1
        idx = len(data)
        data.append(q)

        if ep is not None:
            neg_dist = -distance(q, data[ep])
            # distance(q, data[ep])|

            # line 5-7: find the closest neighbor for levels above the insertion level
            for lc in reversed(graphs[l:]):
                neg_dist, ep = self._search_layer(q, [(neg_dist, ep)], lc, 1)[0]

            # line 8-17: insert q at the relevant levels; W is a candidate list
            layer0 = graphs[0]
            W = [(neg_dist, ep)]  ## 추가

            for lc in reversed(graphs[:l]):
                M_layer = M if lc is not layer0 else self._Mmax

                # line 9: update W with the closest nodes found in the graph
                W = self._search_layer(q, W, lc, efConstruction)  ## 변경

                # line 10: insert the best neighbors for q at this layer
                lc[idx] = layer_idx = {}
                self._select(layer_idx, W, M_layer, lc, heap=True)

                # line 11-13: insert bidirectional links to the new node
                for j, dist in layer_idx.items():
                    self._select(lc[j], (idx, dist), M_layer, lc)

        # line 18: create empty graphs for all new levels
        for _ in range(len(graphs), l):
            graphs.append({idx: {}})
            self._enter_point = idx

    ### Algorithm 5: K-NN-SEARCH
    def search(self, q, K=5, efSearch=20):
        """Find the K points closest to q."""

        distance = self.distance_func
        graphs = self._graphs
        ep = self._enter_point
        self.visited_count = 0

        if ep is None:
            raise ValueError("Empty graph")

        neg_dist = -distance(q, self.data[ep])

        # line 1-5: search from top layers down to the second level
        for lc in reversed(graphs[1:]):
            neg_dist, ep = self._search_layer(q, [(neg_dist, ep)], lc, 1)[0]

        ##########
        self.visited_per_hop = []
        self.ann_per_hop = []
        ##########

        # line 6: search with efSearch neighbors at the bottom level
        W = self._search_layer(q, [(neg_dist, ep)], graphs[0], efSearch)

        if K is not None:
            W = nlargest(K, W)
        else:
            W.sort(reverse=True)

        return [(idx, -md) for md, idx in W]

    ### Algorithm 2: SEARCH-LAYER
    def _search_layer(self, q, W, lc, ef):

        vectorized_distance = self.vectorized_distance
        data = self.data

        # Step 1: Initialize candidate list and visited set
        C = [(-neg_dist, idx) for neg_dist, idx in W]
        heapify(C)
        heapify(W)
        visited = set(idx for _, idx in W)

        # Step 4-17: Explore neighbors until candidate list is exhausted
        while C:
            dist, c = heappop(C)
            furthest = -W[0][0]
            if dist > furthest:
                break
            neighbors = [e for e in lc[c] if e not in visited]
            visited.update(neighbors)
            if neighbors:
                # data에서 한 번에 슬라이스하여 배열 생성 후 벡터화 계산
                ys = np.vstack([data[e] for e in neighbors])
                dists = vectorized_distance(q, ys)
                for e, dist in zip(neighbors, dists):
                    self.visited_count += 1
                    neg_dist = -float(dist)
                    if len(W) < ef:
                        heappush(C, (float(dist), e))
                        heappush(W, (neg_dist, e))
                        furthest = -W[0][0]
                    elif dist < furthest:
                        heappush(C, (float(dist), e))
                        heapreplace(W, (neg_dist, e))
                        furthest = -W[0][0]

            ##########
            self.visited_per_hop.append(len(visited))
            topk = nsmallest(min(ef, len(W)), ((-neg, idx) for neg, idx in W))  # (dist, id)
            self.ann_per_hop.append([idx for _, idx in topk])
            ##########

        return W

    ### Algorithm 3: SELECT-NEIGHBORS-SIMPLE
    def _select(self, R, C, M, lc, heap=False):

        if not heap:
            idx, dist = C
            if len(R) < M:
                R[idx] = dist
            else:
                max_idx, max_dist = max(R.items(), key=itemgetter(1))
                if dist < max_dist:
                    del R[max_idx]
                    R[idx] = dist
            return

        else:
            C = nlargest(M, C)
            R.update({idx: -neg_dist for neg_dist, idx in C})


    # ======== Upper-layer graph editing helpers ========
    def max_level_map(self):
        """Return dict: node -> highest level it appears in."""
        lvl = defaultdict(int)
        for li, lc in enumerate(self._graphs):
            for i in lc.keys():
                if i not in lvl or li > lvl[i]:
                    lvl[i] = li
        return dict(lvl)

    def node_in_level(self, level, idx):
        return idx in self._graphs[level]

    def ensure_node_in_level(self, level, idx):
        if idx not in self._graphs[level]:
            self._graphs[level][idx] = {}

    def add_undirected_edge(self, level, u, v):
        """Add bidirectional edge (u,v) at given level using current distance func and M cap."""
        lc = self._graphs[level]
        # ensure nodes exist
        if u not in lc:
            lc[u] = {}
        if v not in lc:
            lc[v] = {}
        d = self.distance_func(self.data[u], self.data[v])
        # add to u
        self._select(lc[u], (v, d), self._M, lc, heap=False)
        # add to v
        self._select(lc[v], (u, d), self._M, lc, heap=False)

    def drop_undirected_edge(self, level, u, v):
        lc = self._graphs[level]
        if u in lc and v in lc[u]:
            del lc[u][v]
        if v in lc and u in lc[v]:
            del lc[v][u]

    # ======== Upper-layer robust swap helpers (M 제한을 반드시 지킬수 있도록) ========
    def _distance_idx(self, i, j):
        return self.distance_func(self.data[i], self.data[j])

    def _prune_node_to_M(self, level, u, candidate_ids):
        """Rebuild u's neighbor list at given level by pruning the candidate set down to M_layer using _select().
        Returns the set of kept neighbor ids. """
        lc = self._graphs[level]
        # ensure node exists
        if u not in lc:
            lc[u] = {}
        # decide cap per level
        M_layer = self._M if level != 0 else self._Mmax
        # build C as heap items (neg_dist, idx)
        C = []
        for v in set(candidate_ids):
            if v == u:
                continue
            d = self._distance_idx(u, v)
            C.append((-d, v))
        # prune
        R = {}
        self._select(R, C, M_layer, lc, heap=True)
        lc[u] = dict(R)
        return set(lc[u].keys())

    def apply_swaps_with_cap(self, level, drops, adds):
        """Apply batched drops/adds at a level while enforcing degree cap M per node and reciprocity.
        - drops: list of (u, v)
        - adds : list of (u, v)
        """
        lc = self._graphs[level]
        # build per-node to_drop/to_add
        to_drop = defaultdict(set)
        to_add  = defaultdict(set)
        for (u, v) in drops:
            to_drop[u].add(v)
            to_drop[v].add(u)
        for (u, v) in adds:
            to_add[u].add(v)
            to_add[v].add(u)
        # compute pools and prune per node
        affected = set(list(to_drop.keys()) + list(to_add.keys()))
        # ensure nodes exist
        for u in affected:
            if u not in lc:
                lc[u] = {}
        # gather pools
        new_neighbors = {}
        for u in affected:
            cur = set(lc.get(u, {}).keys())
            pool = (cur - to_drop[u]) | to_add[u]
            kept = self._prune_node_to_M(level, u, pool)
            new_neighbors[u] = kept
        # reciprocity fix: ensure symmetry
        for u in affected:
            for v in list(new_neighbors[u]):
                if u not in self._graphs[level].get(v, {}):
                    # add back with distance
                    d = self._distance_idx(u, v)
                    self._select(self._graphs[level][v], (u, d), self._M if level != 0 else self._Mmax, self._graphs[level], heap=False)
            # remove broken reciprocals
            curv = set(self._graphs[level][u].keys())
            for v in list(curv):
                if u not in self._graphs[level].get(v, {}):
                    # remove u from v if needed
                    if v in self._graphs[level] and u in self._graphs[level][v]:
                        del self._graphs[level][v][u]

### Data Preparation

In [45]:
def read_fvecs(filename):
    """Reads .fvecs binary file into np.ndarray of shape (n, d)."""
    with open(filename, 'rb') as f:
        data = f.read()
    dim = struct.unpack('i', data[:4])[0]
    vecs = np.frombuffer(data, dtype=np.float32)
    vecs = vecs.reshape(-1, dim + 1)[:, 1:]  # drop the leading 'dim'
    return vecs

def read_ivecs(filename):
    """Reads .ivecs binary file into np.ndarray of shape (n, k)."""
    with open(filename, 'rb') as f:
        data = f.read()
    dim = struct.unpack('i', data[:4])[0]
    vecs = np.frombuffer(data, dtype=np.int32)
    vecs = vecs.reshape(-1, dim + 1)[:, 1:]
    return vecs

def compute_true_neighbors(data, query, k, distance_func):
    """Compute true k-nearest neighbors for query vectors from data."""
    true_neighbors = []
    for q in query:
        dists = np.array([distance_func(q, x) for x in data])
        nn_indices = np.argsort(dists)[:k]
        true_neighbors.append(nn_indices)
    return np.array(true_neighbors)

def exact_topk(train_subset, queries, K, distance_type='l2'):
    out = np.empty((len(queries), K), dtype=np.int32)
    for i, q in enumerate(queries):
        if distance_type == 'l2':
            d = np.sum((train_subset - q) ** 2, axis=1)
        elif distance_type == 'angular' or distance_type == 'cosine':
            # L2 정규화 필요
            q_norm = q / (np.linalg.norm(q) + 1e-12)
            train_norm = train_subset / (np.linalg.norm(train_subset, axis=1, keepdims=True) + 1e-12)
            d = 1.0 - np.dot(train_norm, q_norm)
        else:
            raise ValueError("distance_type은 'l2' 또는 'angular'이어야 합니다.")
        idx = np.argpartition(d, K)[:K]
        idx = idx[np.argsort(d[idx])]
        out[i] = idx
    return out
# 데이터셋 경로 (현재 구조에 맞춰 수정)
base_path = "./datasets"
# 데이터셋 경로
file_path = base_path + "/glove-200-angular.hdf5"

# h5py를 사용하여 파일 열기
with h5py.File(file_path, 'r') as f:
    # HDF5 파일 내의 데이터셋 키 확인 (어떤 데이터가 있는지 모를 경우 유용)
    print(f"Keys in HDF5 file: {list(f.keys())}")

    # 각 데이터셋을 numpy 배열로 불러오기
    train = np.array(f['train'])
    test = np.array(f['test'])
    neighbors = np.array(f['neighbors'])
    # distances 데이터셋이 있다면 같이 로드할 수 있습니다.
    # distances = np.array(f['distances'])

# random sample 100,000 from train
K_value = 10
seed = 42
n_target = 10_000
rng = np.random.RandomState(seed)
idx = rng.choice(train.shape[0], n_target, replace=False)
train = train[idx]
# only use first 1,000 test vectors for quick trial
test = test[:1000]
neighbor_subset = exact_topk(train, test, max(100, K_value), distance_type='cosine')

dim = train.shape[1]
efConstruction = 50
paramM = 16
distance_method = 'cosine'

# (1) Cosine ~= Inner Product 를 위해 L2 정규화
train_norm = train.copy()

# (3-A) Naive(랜덤 삽입 순서)
rng_vis = np.random.RandomState(42)
naive_order = rng_vis.permutation(n_target)

# (3-B) Cluster-wise(클러스터 순서로 삽입)
kmeans = KMeans(n_clusters=10, n_init='auto', random_state=21).fit(train_norm)
cluster_data = defaultdict(list)
for i, lbl in enumerate(kmeans.labels_):
    cluster_data[int(lbl)].append(i)

clustered_order = []
for c in sorted(cluster_data.keys()):
    clustered_order.extend(cluster_data[c])

Keys in HDF5 file: ['distances', 'neighbors', 'test', 'train']


In [46]:
### Build custom HNSW (naive / cluster-wise) — prototype

def l2_normalize_inplace(X):
    n = np.linalg.norm(X, axis=1, keepdims=True) + 1e-12
    X /= n

# cosine 기반이면 정규화
if distance_method == 'cosine':
    l2_normalize_inplace(train)

# 인덱스 파라미터
custom_M = 8
custom_ef = 128

# (A) naive custom HNSW
naive_custom = HNSW(distance_method, M=custom_M, efConstruction=custom_ef)
for i, idx0 in enumerate(naive_order):
    if i % 1000 == 0:
        print(f"[naive_custom] inserting {i}/{len(naive_order)}")
    naive_custom.insert(train[idx0])

# (B) cluster-wise custom HNSW
cluster_custom = HNSW(distance_method, M=custom_M, efConstruction=custom_ef)
for i, idx0 in enumerate(clustered_order):
    if i % 1000 == 0:
        print(f"[cluster_custom] inserting {i}/{len(clustered_order)}")
    cluster_custom.insert(train[idx0])

print("done: custom HNSW builds")

[naive_custom] inserting 0/10000
[naive_custom] inserting 1000/10000
[naive_custom] inserting 2000/10000
[naive_custom] inserting 3000/10000
[naive_custom] inserting 4000/10000
[naive_custom] inserting 5000/10000
[naive_custom] inserting 6000/10000
[naive_custom] inserting 7000/10000
[naive_custom] inserting 8000/10000
[naive_custom] inserting 9000/10000
[cluster_custom] inserting 0/10000
[cluster_custom] inserting 1000/10000
[cluster_custom] inserting 2000/10000
[cluster_custom] inserting 3000/10000
[cluster_custom] inserting 4000/10000
[cluster_custom] inserting 5000/10000
[cluster_custom] inserting 6000/10000
[cluster_custom] inserting 7000/10000
[cluster_custom] inserting 8000/10000
[cluster_custom] inserting 9000/10000
done: custom HNSW builds


In [47]:
from typing import List, Tuple

def ulbar_single_level(hnsw: HNSW, UL: int, q_quant=0.7, C_min=2, r_min=0.10,
                        L_neigh=3, K_per_comm=3, Bdrop=10, Badd=10,
                        retarget_entry=False, verbose=True) -> Tuple[int,int]:
    """Apply ULBAR at one level UL. Returns (#drops,#adds).
    Uses apply_swaps_with_cap() to enforce M cap and reciprocity.
    """
    lc = hnsw._graphs[UL]
    G = nx.Graph()
    for u, nbrs in lc.items():
        for v in nbrs.keys():
            if u < v:
                G.add_edge(u, v)
    if G.number_of_nodes() == 0:
        return (0, 0)
    # Retarget entry at the first (highest) level if asked
    if retarget_entry:
        deg_map = dict(G.degree())
        new_ep = max(deg_map, key=deg_map.get)
        if verbose:
            print(f"[ULBAR L{UL}] set entry_point -> {new_ep}")
        hnsw._enter_point = new_ep
    # Communities
    try:
        from networkx.algorithms.community import louvain_communities
        comms = louvain_communities(G, seed=42)
    except Exception:
        from networkx.algorithms.community import greedy_modularity_communities
        comms = greedy_modularity_communities(G)
    comm_id = {}
    for cid, C in enumerate(comms):
        for x in C:
            comm_id[int(x)] = cid
    # helpers
    def coverage_and_ratio(u):
        vs = list(lc.get(u, {}).keys())
        if not vs:
            return 0, 0.0
        cov = len({comm_id.get(v, -1) for v in vs})
        ic  = sum(1 for v in vs if comm_id.get(v, -1) != comm_id.get(u, -1)) / len(vs)
        return cov, ic
    use_l2 = (hnsw.distance_func == hnsw.l2_distance)
    def dist_idx(a,b):
        if use_l2:
            return float(np.linalg.norm(hnsw.data[a] - hnsw.data[b]))
        va, vb = hnsw.data[a], hnsw.data[b]
        na = np.linalg.norm(va) + 1e-12
        nb = np.linalg.norm(vb) + 1e-12
        return 1.0 - float(np.dot(va, vb) / (na * nb))
    def jaccard_lvl(a,b):
        A = set(lc.get(a, {}).keys()); B = set(lc.get(b, {}).keys())
        if not A and not B: return 0.0
        return len(A & B) / max(len(A | B), 1)
    # Isolation
    iso_nodes = []
    for u in lc.keys():
        cov, ic = coverage_and_ratio(u)
        if cov < C_min or ic < r_min:
            iso_nodes.append((u, cov, ic))
    iso_nodes.sort(key=lambda x:(x[1], x[2]))
    # long-edge tau
    edge_lens = []
    seen = set()
    for u, nbrs in lc.items():
        for v in nbrs.keys():
            a,b = (u,v) if u < v else (v,u)
            if (a,b) in seen: continue
            seen.add((a,b))
            edge_lens.append(dist_idx(a,b))
    edge_lens = np.array(edge_lens) if len(edge_lens) else np.array([0.0])
    tau = float(np.quantile(edge_lens, q_quant)) if edge_lens.size>0 else 0.0
    # bad edges (inter-comm & long & redundant)
    bad_edges = []
    seen.clear()
    for u, nbrs in lc.items():
        for v in nbrs.keys():
            a,b = (u,v) if u < v else (v,u)
            if (a,b) in seen: continue
            seen.add((a,b))
            if comm_id.get(a,-1) == comm_id.get(b,-1):
                continue
            d = dist_idx(a,b)
            if d < tau:
                continue
            if jaccard_lvl(a,b) > 0.5:
                bad_edges.append((a,b,d))
    # fallback drops if none
    if len(bad_edges) == 0:
        intra_long = []
        seen.clear()
        for u, nbrs in lc.items():
            for v in nbrs.keys():
                a,b = (u,v) if u < v else (v,u)
                if (a,b) in seen: continue
                seen.add((a,b))
                if comm_id.get(a,-1) != comm_id.get(b,-1):
                    continue
                intra_long.append((a,b,dist_idx(a,b)))
        intra_long.sort(key=lambda x:-x[2])
        bad_edges = intra_long[:Bdrop]
    # hubs per community (degree)
    deg = dict(G.degree())
    per_comm = defaultdict(list)
    for v,dv in deg.items():
        per_comm[comm_id.get(v,-1)].append((dv,v))
    for c in per_comm:
        per_comm[c].sort(reverse=True)
    # medoids
    def medoid_of_comm(C):
        nodes = list(C)
        if not nodes: return None
        sub = np.asarray([hnsw.data[i] for i in nodes])
        if use_l2:
            dsum = ((sub[:,None,:]-sub[None,:,:])**2).sum(axis=2).sum(axis=1)
        else:
            subn = sub / (np.linalg.norm(sub,axis=1,keepdims=True)+1e-12)
            S = subn @ subn.T
            dsum = (1.0 - S).sum(axis=1)
        return nodes[int(np.argmin(dsum))]
    medoids = {}
    for cid,C in enumerate(comms):
        m = medoid_of_comm(C)
        if m is not None:
            medoids[cid] = m
    def nearest_comms_to_node(u, L=3):
        cu = comm_id.get(u,-1)
        pairs = []
        for cid,m in medoids.items():
            if cid == cu: continue
            pairs.append((dist_idx(u,m), cid))
        pairs.sort()
        return [cid for _,cid in pairs[:L]]
    # ADD proposals
    add_props = []
    for (u,_,_) in iso_nodes[:min(10,len(iso_nodes))]:
        for cid in nearest_comms_to_node(u, L=L_neigh):
            hubs = [v for _,v in per_comm.get(cid, [])[:K_per_comm]]
            for v in hubs:
                add_props.append((u,v,dist_idx(u,v)))
    # build batches
    drops_batch = [(a,b) for (a,b,_) in bad_edges[:Bdrop]]
    adds_batch  = [(u,v) for (u,v,_) in sorted(add_props, key=lambda x:x[2])[:Badd]]
    hnsw.apply_swaps_with_cap(UL, drops_batch, adds_batch)
    if verbose:
        print(f"[ULBAR L{UL}] cap-enforced swaps: drop={len(drops_batch)}, add={len(adds_batch)}")
    return (len(drops_batch), len(adds_batch))


def ulbar_multi_levels(hnsw: HNSW, mode='top_half', q_quant=0.7, budgets=(10,10), verbose=True):
    """Run ULBAR across multiple levels (excluding level 0).
    mode: 'all_upper' (levels max..1), 'top_half' (from max down to max//2).
    budgets: (Bdrop, Badd)
    """
    lvl_map = hnsw.max_level_map()
    if not lvl_map:
        return []
    Lmax = max(lvl_map.values())
    if mode == 'all_upper':
        target_lvls = list(range(Lmax, 0, -1))
    else:  # top_half default
        start = Lmax
        stop  = max(1, Lmax//2)
        target_lvls = list(range(start, stop-1, -1))
    if verbose:
        print(f"[ULBAR multi] target levels: {target_lvls}")
    ops = []
    for i, UL in enumerate(target_lvls):
        drops, adds = ulbar_single_level(
            hnsw, UL, q_quant=q_quant, Bdrop=budgets[0], Badd=budgets[1],
            retarget_entry=(i==0), verbose=verbose)
        ops.append((UL, drops, adds))
    return ops

In [49]:
'''
ULBAR (Upper-Layer Bridge Audit & Repair):
	•	TOP layer 그래프 생성 → 커뮤니티 분할
	•	entry point를 상층의 최대 허브로 재설정
	•	isolated node 탐지: coverage<2 또는 inter-cluster<0.25
	•	bad long edges: inter-comm ∧ 길이 quantile 상위(0.7) ∧ redundancy(Jaccard)>0.5
	•	fallback: bad_edges가 없으면 intra-comm 장거리 상위 일부 drop
	•	ADD proposals: isolated 상위 10개 × 가까운 타 커뮤니티 3개 × 허브 3개
	•	스왑 적용(drop/add): drop_undirected_edge / add_undirected_edge
'''
naive_internal_to_orig = np.array(naive_order)
cluster_internal_to_orig = np.array(clustered_order)

def map_internal_to_orig(idxs_internal, order_map):
    return [int(order_map[i]) for i in idxs_internal]

def calculate_recall(gt, pred, k=10):
    s = set(gt[:k])
    return sum(1 for x in pred[:k] if x in s) / float(k)

def last_or_zero(seq):
    return int(seq[-1]) if isinstance(seq, (list, tuple)) and len(seq) > 0 else 0

# 기존 recall & visited 측정
### Quick eval (custom HNSW naive_custom) — recall & visited
efQuick = [60, 100]
for ef in efQuick:
    legacy_recs = []
    legacy_visits = []
    for i in range(min(test.shape[0], 200)):
        q = test[i]
        res = naive_custom.search(q, K=K_value, efSearch=ef)
        internal_idxs = [ix for ix, _ in res]
        orig_idxs = map_internal_to_orig(internal_idxs, naive_internal_to_orig)
        legacy_recs.append(calculate_recall(neighbor_subset[i], orig_idxs, k=K_value))
        legacy_visits.append(last_or_zero(naive_custom.visited_per_hop))
    print(f"[custom-naive before TOP repair] ef={ef}: recall@10={np.mean(legacy_recs):.4f}, visited={np.mean(legacy_visits):.1f}")

# === Run multi-layer ULBAR (top half by default) ===
ops = ulbar_multi_levels(naive_custom, mode='top_half', q_quant=0.6, budgets=(12,12), verbose=True)
print("[ULBAR multi] ops summary:", ops)

# Verify whether all nodes in upper layer meets degree cap M, that is, no node has more than M neighbors
lvl_map = naive_custom.max_level_map()
Lmax = max(lvl_map.values())
start = Lmax
stop  = max(1, Lmax//2)
for layer in range(start,  stop-1, -1):
    lc_after = naive_custom._graphs[layer]
    violations = []
    for u, nbrs in lc_after.items():
        deg_u = len(nbrs)
        if deg_u > naive_custom._M:
            violations.append((u, deg_u))
    if violations:
        print(f"[ULBAR] degree cap violations@L{layer}: {len(violations)} (examples) ->", violations[:5])
        throw_error("M Degree cap violations detected after ULBAR!")
    else:
        print(f"[ULBAR] all nodes meet degree cap@L{layer} (M={naive_custom._M})")

### Quick eval (custom HNSW naive_custom) — recall & visited (after TOP repair)
efQuick = [60, 100]
for ef in efQuick:
    recs = []
    visits = []
    for i in range(min(test.shape[0], 200)):
        q = test[i]
        if distance_method == 'cosine':
            q = q / (np.linalg.norm(q) + 1e-12)
        res = naive_custom.search(q, K=K_value, efSearch=ef)
        internal_idxs = [ix for ix, _ in res]
        orig_idxs = map_internal_to_orig(internal_idxs, naive_internal_to_orig)
        recs.append(calculate_recall(neighbor_subset[i], orig_idxs, k=K_value))
        visits.append(last_or_zero(naive_custom.visited_per_hop))
    print(f"[custom-naive TOP repair] ef={ef}: recall@10={np.mean(recs):.4f}, visited={np.mean(visits):.1f}")


[custom-naive before TOP repair] ef=60: recall@10=0.6725, visited=503.6
[custom-naive before TOP repair] ef=100: recall@10=0.7460, visited=768.8
[ULBAR multi] target levels: [4, 3, 2]
[ULBAR L3] cap-enforced swaps: drop=12, add=0
[ULBAR L2] cap-enforced swaps: drop=12, add=12
[ULBAR multi] ops summary: [(4, 0, 0), (3, 12, 0), (2, 12, 12)]
[ULBAR] all nodes meet degree cap@L4 (M=8)
[ULBAR] all nodes meet degree cap@L3 (M=8)
[ULBAR] all nodes meet degree cap@L2 (M=8)
[custom-naive TOP repair] ef=60: recall@10=0.6680, visited=501.8
[custom-naive TOP repair] ef=100: recall@10=0.7455, visited=767.7
