In [5]:
import numpy as np

NEG = -1e12  # numerical -inf


class TSPHypercubeSimplified_PatchA:
    """
    Patch A:
      - 두 개의 trellis 운용
        * psi_nb : β 미포함(no-beta) → α_t(a) 계산 전용
        * psi_wb : β 포함(with-beta) → backpointer/디코딩 전용
      - t=1에서는 s(depot,a) (+ β[0,a]는 psi_wb에만)
      - 전이에서 self-loop 금지(last==a)
      - backptr의 prev_last는 센티넬 -1로 시작(t=1)하여 depot이 경로 중간에 끼지 않게 함
      - 마지막 선택은 psi_wb[T, full, last] + s(last, depot) (closure)
    Simplified messages:
      - φ̃, η̃, ρ̃, λ, β, ζ, δ̃, γ̃, ω̃ (β는 ψ 전이에만 사용, α는 ψ_nb에서 계산)
    """

    def __init__(self, D, start_city=None,
                 damping=0.3, iters=200, verbose=False,
                 tiny_tiebreak=False, seed=0,
                 patience_no_cost_change=10, cost_tol=1e-12):

        D = np.array(D, dtype=float)
        assert D.shape[0] == D.shape[1], "D must be square"
        C = D.shape[0]

        if start_city is None:
            start_city = 0
        start_city = int(start_city)
        assert 0 <= start_city < C

        # permute: start -> last (internal depot)
        perm = np.arange(C)
        if start_city != C - 1:
            perm[start_city], perm[C - 1] = perm[C - 1], perm[start_city]
        inv_perm = np.empty(C, dtype=int); inv_perm[perm] = np.arange(C)

        self.orig_D = D
        self.D = D[perm][:, perm]
        self.perm = perm
        self.inv_perm = inv_perm

        self.C = C
        self.N = C - 1
        self.depot = C - 1
        self.verbose = verbose
        self.damp = float(damping)
        self.iters = int(iters)
        self.tiny_tiebreak = bool(tiny_tiebreak)
        self.rng = np.random.default_rng(seed)
        self.patience_no_cost_change = int(patience_no_cost_change)
        self.cost_tol = float(cost_tol)

        # similarity (bigger is better)
        mx = np.max(self.D)
        self.s = mx - self.D

        # trellis
        self.T = self.N
        self.M = 1 << self.N

        # psi (no-beta / with-beta)
        self.psi_nb = np.full((self.T + 1, self.M, self.N), NEG)  # no β → α용
        self.psi_wb = np.full((self.T + 1, self.M, self.N), NEG)  # with β → 디코딩용
        self.backptr = np.full((self.T + 1, self.M, self.N, 2), -1, dtype=int)

        # α_t(a) : from psi_nb
        self.alpha = np.full((self.T, self.N), NEG)

        # simplified messages
        self.gamma_tilde = np.zeros((self.N, self.T))
        self.omega_tilde = np.zeros((self.N, self.T))
        self.phi_tilde   = np.zeros((self.N, self.T))
        self.eta_tilde   = np.zeros((self.N, self.T))
        self.rho_tilde   = np.zeros((self.N, self.T))
        self.delta_tilde = np.zeros((self.N, self.T))

        # λ_t(i,a), ζ_t(i,a), β_t(a)
        self.lambda_ = np.zeros((self.T, self.N, self.N))
        self.zeta    = np.zeros((self.T, self.N, self.N))
        self.beta    = np.zeros((self.T, self.N))

    # ===================== Public =====================
    def run(self):
        stable = 0
        last_cost = None
        best_route, best_cost = None, None

        for it in range(self.iters):
            self._trellis_forward_dual_and_alpha()   # ψ_nb, ψ_wb, α
            self._update_phi_eta_rho()               # φ̃, η̃, ρ̃
            self._update_lambda_beta_zeta_delta()    # λ, β, ζ, δ̃
            self._update_gamma_omega()               # γ̃, ω̃

            if self.tiny_tiebreak:
                self.gamma_tilde += 1e-12 * self.rng.standard_normal(self.gamma_tilde.shape)

            route = self.estimate_route()            # ψ_wb 기반 + closure
            cost = self._route_cost(route)

            if self.verbose:
                print(f"[{it+1:03d}] cost={cost:.12f} route={route}")

            # track best primal
            if best_cost is None or cost < best_cost:
                best_cost, best_route = cost, route

            # plateau early-stop
            if last_cost is not None and abs(cost - last_cost) <= self.cost_tol:
                stable += 1
            else:
                stable = 0
            last_cost = cost
            if stable >= self.patience_no_cost_change:
                return best_route, best_cost

            #print(self.omega_tilde[0,0])

        return best_route, best_cost

    # ===================== Trellis (dual) & α =====================
    def _trellis_forward_dual_and_alpha(self):
        self.psi_nb.fill(NEG)
        self.psi_wb.fill(NEG)
        self.backptr.fill(-1)
        self.alpha.fill(NEG)

        # t=1: depot -> a
        for a in range(self.N):
            m = 1 << a
            # no-β
            self.psi_nb[1, m, a] = self.s[self.depot, a]
            # with-β
            self.psi_wb[1, m, a] = self.s[self.depot, a] + self.beta[0, a]
            # backptr (센티넬 -1)
            self.backptr[1, m, a] = (0, -1)
            # α_1(a) from no-β
            self.alpha[0, a] = max(self.alpha[0, a], self.psi_nb[1, m, a])

        # t=2..T
        for t in range(2, self.T + 1):
            for mask in range(self.M):
                if mask == 0 or mask.bit_count() != (t - 1):
                    continue
                for a in range(self.N):
                    if mask & (1 << a):
                        continue
                    new_mask = mask | (1 << a)
                    best_nb = NEG
                    best_wb = NEG
                    best_last = -1

                    m = mask
                    while m:
                        last = (m & -m).bit_length() - 1
                        m ^= (1 << last)
                        if last == a:
                            continue  # self-loop 금지

                        # no-β 전이(α 계산용)
                        cand_nb = self.psi_nb[t - 1, mask, last] + self.s[last, a]
                        if cand_nb > best_nb:
                            best_nb = cand_nb

                        # with-β 전이(디코딩용)
                        cand_wb = self.psi_wb[t - 1, mask, last] + self.s[last, a] + self.beta[t - 1, a]
                        if cand_wb > best_wb:
                            best_wb = cand_wb
                            best_last = last

                    if best_nb > self.psi_nb[t, new_mask, a]:
                        self.psi_nb[t, new_mask, a] = best_nb
                    if best_wb > self.psi_wb[t, new_mask, a]:
                        self.psi_wb[t, new_mask, a] = best_wb
                        self.backptr[t, new_mask, a] = (mask, best_last)

            # α_t(a) from no-β
            for a in range(self.N):
                best = NEG
                for mask in range(self.M):
                    if (mask & (1 << a)) and mask.bit_count() == t:
                        best = max(best, self.psi_nb[t, mask, a])
                self.alpha[t - 1, a] = best

    # ===================== Messages =====================
    def _update_phi_eta_rho(self):
        # φ̃_it = -max_{i'≠i} γ̃_i't
        for t in range(self.T):
            col = self.gamma_tilde[:, t]
            for i in range(self.N):
                self.phi_tilde[i, t] = -np.max(np.delete(col, i)) if self.N > 1 else 0.0
        # η̃_it = -max_{t'≠t} ω̃_it'
        for i in range(self.N):
            row = self.omega_tilde[i, :]
            for t in range(self.T):
                self.eta_tilde[i, t] = -np.max(np.delete(row, t)) if self.T > 1 else 0.0
        # ρ̃_it
        self.rho_tilde = self.eta_tilde + self.phi_tilde

    def _update_lambda_beta_zeta_delta(self, kappa=0.0, llr_t=None,
                                   damp_L=0.5, damp_beta=0.5, damp_zeta=0.5):
        T, N = self.T, self.N
        # 이전 값 보관 (damping용)
        L_prev   = getattr(self, "_L_prev",   [np.zeros((N,N)) for _ in range(T)])
        beta_prev= getattr(self, "_beta_prev",[np.zeros(N)     for _ in range(T)])
        zeta_prev= getattr(self, "_zeta_prev",[np.zeros((N,N)) for _ in range(T)])

        for t in range(T):
            # ----- (1) rhõ -> (rho0, rho1) 임시 복원 -----
            r = self.rho_tilde[:, t]      # shape (N,)
            rho0 = -r / N                 # off-diagonal
            rho1 = rho0 + r               # diagonal

            # ----- (2) L_new(i,a): a==i→rho1, else→rho0 (원식 1.8) -----
            L_new = np.empty((N, N), float)
            for i in range(N):
                L_new[i, :] = rho0[i]
                L_new[i, i] = rho1[i]

            # ----- (3) λ 이중 센터링 (게이지 완전 제거) -----
            # 행 평균 0
            L_new -= L_new.mean(axis=1, keepdims=True)
            # 열 평균 0  (행-열 순서 바꿔 반복해도 됨)
            L_new -= L_new.mean(axis=0, keepdims=True)
            # 전체 평균 복원(원하는 게이지: 전체 평균 0)
            # L_new += L_new.mean()  # 굳이 복원할 필요 없음(0에 두는 게 안전)

            # ----- (4) λ damping -----
            L = damp_L * L_new + (1 - damp_L) * L_prev[t]
            self.lambda_[t] = L

            # ----- (5) β_t(a) = sum_i λ_{it}(a) -----
            beta_new = L.sum(axis=0)  # 열합
            # 열합은 이미 거의 0에 가깝지만 수치오차 방지용으로 한 번 더
            beta_new -= beta_new.mean()

            # ----- (6) β damping -----
            beta_t = damp_beta * beta_new + (1 - damp_beta) * beta_prev[t]
            self.beta[t, :] = beta_t

            # ----- (7) ζ_it(a) = α_t(a) + β_t(a) - λ_it(a) -----
            a_t = self.alpha[t, :]  # from psi_nb
            Z_new = a_t[np.newaxis, :] + beta_t[np.newaxis, :] - L

            # ----- (8) (옵션) SOVA LLR: ζ 대각에만 주입 -----
            if kappa and llr_t is not None:
                idx = np.arange(N)
                Z_new[idx, idx] += kappa * float(llr_t[t])

            # ----- (9) ζ damping -----
            Z = damp_zeta * Z_new + (1 - damp_zeta) * zeta_prev[t]
            self.zeta[t] = Z

            # ----- (10) δ̃_it = ζ_it(i) - max_{a≠i} ζ_it(a) -----
            for i in range(N):
                zi = Z[i, :]
                self.delta_tilde[i, t] = 0.0 if N == 1 else (zi[i] - np.max(np.delete(zi, i)))

        # 캐시 갱신
        self._L_prev    = [self.lambda_[t].copy() for t in range(T)]
        self._beta_prev = [self.beta[t].copy()    for t in range(T)]
        self._zeta_prev = [self.zeta[t].copy()    for t in range(T)]

    def _update_gamma_omega(self):
        gamma_new = self.eta_tilde + self.delta_tilde
        omega_new = self.phi_tilde + self.delta_tilde
        self.gamma_tilde = self.damp * gamma_new + (1 - self.damp) * self.gamma_tilde
        self.omega_tilde = self.damp * omega_new + (1 - self.damp) * self.omega_tilde

    # ===================== Decode =====================
    def estimate_route(self):
        full_mask = (1 << self.N) - 1
        best_val = NEG
        best_last = -1

        for last in range(self.N):
            base = self.psi_wb[self.T, full_mask, last]
            if base <= NEG / 2:
                continue
            val = base + self.s[last, self.depot]  # closure
            if val > best_val:
                best_val = val
                best_last = last

        # backtrack
        if best_last < 0:
            # fallback: α 기반 greedy
            route_internal = [self.depot]
            used = set()
            for t in range(self.T):
                sc = self.alpha[t].copy()
                for u in used:
                    sc[u] = NEG
                if self.tiny_tiebreak:
                    sc += 1e-15 * np.arange(self.N)
                a = int(np.argmax(sc))
                used.add(a)
                route_internal.append(a)
            route_internal.append(self.depot)
        else:
            route_inner = []
            mask = full_mask
            last = best_last
            t = self.T
            while t > 0 and 0 <= last < self.N:  # 센티넬(-1) 만나면 종료
                route_inner.append(last)
                prev_mask, prev_last = self.backptr[t, mask, last]
                mask, last = prev_mask, prev_last
                t -= 1
            route_inner.reverse()
            route_internal = [self.depot] + route_inner + [self.depot]

        return [int(self.inv_perm[c]) for c in route_internal]

    def _route_cost(self, route):
        return float(sum(self.orig_D[route[k], route[k + 1]] for k in range(len(route) - 1)))


In [6]:
import numpy as np

NEG = -1e12  # numerical -inf


class TSPHypercubeSimplified_PatchA_BCJR:
    """
    TSPHypercubeSimplified_PatchA를 SOVA-style BCJR로 확장한 버전.

    핵심 아이디어:
      - 여전히 Max-Sum(로그 도메인) 기반.
      - 두 개의 trellis 운용:
        * psi_nb : β 미포함(no-beta) → α_t(a) 계산 전용 (dual)
        * psi_wb : β 포함(with-beta) → 디코딩 및 BCJR용 (primal)
      - 여기에 backward trellis:
        * bwd_wb : (t, mask, last)에서 끝까지 갈 수 있는 최적 future metric
      - BCJR-style state metric:
        * Gamma[t, mask, last] = psi_wb[t, mask, last] + bwd_wb[t, mask, last]
      - SOVA-style soft-output:
        * llr[t, i] ≈
            max_{mask, last=i, |mask|=t} Gamma(t, mask, i)
          − max_{mask, last≠i, |mask|=t} Gamma(t, mask, last)
      - 이 LLR을 ζ 대각에 주입해 dual/primal 간 soft 정보 교환 강화를 유도.
    """

    def __init__(self, D, start_city=None,
                 damping=0.3, iters=200, verbose=False,
                 tiny_tiebreak=False, seed=0,
                 patience_no_cost_change=10, cost_tol=1e-12,
                 kappa_bcjr=1.0,
                 damp_L=0.5, damp_beta=0.5, damp_zeta=0.5):

        D = np.array(D, dtype=float)
        assert D.shape[0] == D.shape[1], "D must be square"
        C = D.shape[0]

        if start_city is None:
            start_city = 0
        start_city = int(start_city)
        assert 0 <= start_city < C

        # permute: start -> last (internal depot)
        perm = np.arange(C)
        if start_city != C - 1:
            perm[start_city], perm[C - 1] = perm[C - 1], perm[start_city]
        inv_perm = np.empty(C, dtype=int)
        inv_perm[perm] = np.arange(C)

        self.orig_D = D
        self.D = D[perm][:, perm]
        self.perm = perm
        self.inv_perm = inv_perm

        self.C = C
        self.N = C - 1
        self.depot = C - 1

        self.verbose = verbose
        self.damp = float(damping)
        self.iters = int(iters)
        self.tiny_tiebreak = bool(tiny_tiebreak)
        self.rng = np.random.default_rng(seed)
        self.patience_no_cost_change = int(patience_no_cost_change)
        self.cost_tol = float(cost_tol)

        # BCJR / SOVA 관련 파라미터
        self.kappa_bcjr = float(kappa_bcjr)
        self.damp_L = float(damp_L)
        self.damp_beta = float(damp_beta)
        self.damp_zeta = float(damp_zeta)

        # similarity (bigger is better)
        mx = np.max(self.D)
        self.s = mx - self.D

        # trellis 크기
        self.T = self.N
        self.M = 1 << self.N

        # psi (no-beta / with-beta)
        self.psi_nb = np.full((self.T + 1, self.M, self.N), NEG)  # α용 no-β
        self.psi_wb = np.full((self.T + 1, self.M, self.N), NEG)  # 디코딩 & BCJR용 with-β
        self.backptr = np.full((self.T + 1, self.M, self.N, 2), -1, dtype=int)

        # backward trellis (with-beta, closure 포함)
        self.bwd_wb = np.full((self.T + 1, self.M, self.N), NEG)

        # α_t(a) : from psi_nb
        self.alpha = np.full((self.T, self.N), NEG)

        # simplified messages
        self.gamma_tilde = np.zeros((self.N, self.T))
        self.omega_tilde = np.zeros((self.N, self.T))
        self.phi_tilde   = np.zeros((self.N, self.T))
        self.eta_tilde   = np.zeros((self.N, self.T))
        self.rho_tilde   = np.zeros((self.N, self.T))
        self.delta_tilde = np.zeros((self.N, self.T))

        # λ_t(i,a), ζ_t(i,a), β_t(a)
        self.lambda_ = np.zeros((self.T, self.N, self.N))
        self.zeta    = np.zeros((self.T, self.N, self.N))
        self.beta    = np.zeros((self.T, self.N))

        # damping 캐시
        self._L_prev    = [np.zeros((self.N, self.N)) for _ in range(self.T)]
        self._beta_prev = [np.zeros(self.N)            for _ in range(self.T)]
        self._zeta_prev = [np.zeros((self.N, self.N)) for _ in range(self.T)]

    # ===================== Public =====================
    def run(self):
        stable = 0
        last_cost = None
        best_route, best_cost = None, None

        for it in range(self.iters):
            # (1) forward trellis (dual/primal) & α
            self._trellis_forward_dual_and_alpha()

            # (2) backward trellis (primal with β + closure)
            self._trellis_backward_bcjr()

            # (3) BCJR-style SOVA LLR 계산 (T x N)
            llr_t = self._compute_sova_llr_from_bcjr()

            # (4) 메시지 업데이트
            self._update_phi_eta_rho()
            self._update_lambda_beta_zeta_delta(
                kappa=self.kappa_bcjr,
                llr_t=llr_t,
                damp_L=self.damp_L,
                damp_beta=self.damp_beta,
                damp_zeta=self.damp_zeta,
            )
            self._update_gamma_omega()

            if self.tiny_tiebreak:
                self.gamma_tilde += 1e-12 * self.rng.standard_normal(self.gamma_tilde.shape)

            # (5) route & cost
            route = self.estimate_route()
            cost = self._route_cost(route)

            if self.verbose:
                print(f"[{it+1:03d}] cost={cost:.12f} route={route}")

            # best primal 추적
            if best_cost is None or cost < best_cost:
                best_cost, best_route = cost, route

            # plateau early stop
            if last_cost is not None and abs(cost - last_cost) <= self.cost_tol:
                stable += 1
            else:
                stable = 0
            last_cost = cost

            if stable >= self.patience_no_cost_change:
                return best_route, best_cost

        return best_route, best_cost

    # ===================== Trellis (dual) & α =====================
    def _trellis_forward_dual_and_alpha(self):
        self.psi_nb.fill(NEG)
        self.psi_wb.fill(NEG)
        self.backptr.fill(-1)
        self.alpha.fill(NEG)

        # t=1: depot -> a
        for a in range(self.N):
            m = 1 << a
            # no-β
            self.psi_nb[1, m, a] = self.s[self.depot, a]
            # with-β
            self.psi_wb[1, m, a] = self.s[self.depot, a] + self.beta[0, a]
            # backptr (센티넬 -1)
            self.backptr[1, m, a] = (0, -1)
            # α_1(a) from no-β
            self.alpha[0, a] = max(self.alpha[0, a], self.psi_nb[1, m, a])

        # t=2..T
        for t in range(2, self.T + 1):
            for mask in range(self.M):
                if mask == 0 or mask.bit_count() != (t - 1):
                    continue
                for a in range(self.N):
                    if mask & (1 << a):
                        continue
                    new_mask = mask | (1 << a)
                    best_nb = NEG
                    best_wb = NEG
                    best_last = -1

                    m = mask
                    while m:
                        last = (m & -m).bit_length() - 1
                        m ^= (1 << last)
                        if last == a:
                            continue  # self-loop 금지

                        # no-β 전이(α 계산용)
                        cand_nb = self.psi_nb[t - 1, mask, last] + self.s[last, a]
                        if cand_nb > best_nb:
                            best_nb = cand_nb

                        # with-β 전이(디코딩용)
                        cand_wb = self.psi_wb[t - 1, mask, last] + self.s[last, a] + self.beta[t - 1, a]
                        if cand_wb > best_wb:
                            best_wb = cand_wb
                            best_last = last

                    if best_nb > self.psi_nb[t, new_mask, a]:
                        self.psi_nb[t, new_mask, a] = best_nb
                    if best_wb > self.psi_wb[t, new_mask, a]:
                        self.psi_wb[t, new_mask, a] = best_wb
                        self.backptr[t, new_mask, a] = (mask, best_last)

            # α_t(a) from no-β
            for a in range(self.N):
                best = NEG
                for mask in range(self.M):
                    if (mask & (1 << a)) and mask.bit_count() == t:
                        best = max(best, self.psi_nb[t, mask, a])
                self.alpha[t - 1, a] = best

    # ===================== Backward trellis (BCJR용) =====================
    def _trellis_backward_bcjr(self):
        """
        bwd_wb[t, mask, last]:
          - t 시점에 (mask,last) 상태에서 시작해서
          - 나머지 도시 방문 + depot으로 귀환까지의 최대 future metric.
        전이:
          - t < T:
              bwd[t,mask,last] = max_{a not in mask} { s[last,a] + β_t(a) + bwd[t+1, mask|{a}, a] }
          - t = T:
              full_mask 에 대해서만 closure: s[last, depot]
        """
        self.bwd_wb.fill(NEG)
        full_mask = (1 << self.N) - 1

        # t = T: full mask에서 depot으로 가는 closure만 고려
        t = self.T
        for last in range(self.N):
            mask = full_mask
            # 여기서는 이미 모든 도시 방문 완료 상태
            # closure: last -> depot
            self.bwd_wb[t, mask, last] = self.s[last, self.depot]

        # t = T-1..1 역순
        for t in range(self.T - 1, 0, -1):
            for mask in range(self.M):
                if mask == 0 or mask.bit_count() != t:
                    continue
                # last는 mask에 반드시 포함
                for last in range(self.N):
                    if not (mask & (1 << last)):
                        continue

                    best = NEG
                    # 아직 방문하지 않은 도시들
                    avail = (~mask) & full_mask
                    m = avail
                    while m:
                        a = (m & -m).bit_length() - 1
                        m ^= (1 << a)
                        new_mask = mask | (1 << a)
                        cand = self.s[last, a] + self.beta[t, a] + self.bwd_wb[t + 1, new_mask, a]
                        if cand > best:
                            best = cand
                    self.bwd_wb[t, mask, last] = best

    # ===================== SOVA-style LLR from BCJR =====================
    def _compute_sova_llr_from_bcjr(self):
        """
        llr[t, i] ≈
          max_{mask, last=i, |mask|=t+1} [ psi_wb[t+1, mask, i] + bwd_wb[t+1, mask, i] ]
          - max_{mask, last≠i, |mask|=t+1} [ psi_wb[t+1, mask, last] + bwd_wb[t+1, mask, last] ]
        여기서 내부 시간 인덱스(t+1)는 1..T 와 매칭, 외부 t는 0..T-1.
        """
        T, N, M = self.T, self.N, self.M
        llr = np.zeros((T, N), dtype=float)

        for t in range(1, T + 1):  # 내부 시간: 1..T
            # (1) 전체 state metric Gamma(t,mask,last)
            #     = forward ψ_wb + backward bwd_wb
            #     (이미 closure까지 포함된 future metric)
            # 여기서 mask bitcount == t 인 것만 의미 있음
            # llr[t-1, i]에 대응
            for i in range(N):
                best_with = NEG
                best_without = NEG

                for mask in range(M):
                    if mask == 0 or mask.bit_count() != t:
                        continue

                    # last=i 인 state
                    val_i = self.psi_wb[t, mask, i] + self.bwd_wb[t, mask, i]
                    if val_i > best_with:
                        best_with = val_i

                    # last≠i 인 state들 중 최고값
                    m2 = mask
                    while m2:
                        last = (m2 & -m2).bit_length() - 1
                        m2 ^= (1 << last)
                        if last == i:
                            continue
                        val = self.psi_wb[t, mask, last] + self.bwd_wb[t, mask, last]
                        if val > best_without:
                            best_without = val

                # numerical guard
                if best_with <= NEG / 2 and best_without <= NEG / 2:
                    llr[t - 1, i] = 0.0
                else:
                    llr[t - 1, i] = best_with - best_without

        return llr

    # ===================== Messages =====================
    def _update_phi_eta_rho(self):
        # φ̃_it = -max_{i'≠i} γ̃_i't
        for t in range(self.T):
            col = self.gamma_tilde[:, t]
            for i in range(self.N):
                self.phi_tilde[i, t] = -np.max(np.delete(col, i)) if self.N > 1 else 0.0
        # η̃_it = -max_{t'≠t} ω̃_it'
        for i in range(self.N):
            row = self.omega_tilde[i, :]
            for t in range(self.T):
                self.eta_tilde[i, t] = -np.max(np.delete(row, t)) if self.T > 1 else 0.0
        # ρ̃_it
        self.rho_tilde = self.eta_tilde + self.phi_tilde

    def _update_lambda_beta_zeta_delta(self, kappa=0.0, llr_t=None,
                                       damp_L=0.5, damp_beta=0.5, damp_zeta=0.5):
        T, N = self.T, self.N

        for t in range(T):
            # ----- (1) rhõ -> (rho0, rho1) 임시 복원 -----
            r = self.rho_tilde[:, t]  # shape (N,)
            rho0 = -r / N             # off-diagonal
            rho1 = rho0 + r           # diagonal

            # ----- (2) L_new(i,a): a==i→rho1, else→rho0 (원식 1.8) -----
            L_new = np.empty((N, N), float)
            for i in range(N):
                L_new[i, :] = rho0[i]
                L_new[i, i] = rho1[i]

            # ----- (3) λ 이중 센터링 (게이지 완전 제거) -----
            # 행 평균 0
            L_new -= L_new.mean(axis=1, keepdims=True)
            # 열 평균 0
            L_new -= L_new.mean(axis=0, keepdims=True)
            # 전체 평균은 0으로 두는 게 안전 (복원 생략)

            # ----- (4) λ damping -----
            L_prev = self._L_prev[t]
            L = damp_L * L_new + (1 - damp_L) * L_prev
            self.lambda_[t] = L

            # ----- (5) β_t(a) = sum_i λ_{it}(a) -----
            beta_new = L.sum(axis=0)  # 열합
            # 수치오차 방지용 mean 제거
            beta_new -= beta_new.mean()

            # ----- (6) β damping -----
            beta_prev = self._beta_prev[t]
            beta_t = damp_beta * beta_new + (1 - damp_beta) * beta_prev
            self.beta[t, :] = beta_t

            # ----- (7) ζ_it(a) = α_t(a) + β_t(a) - λ_it(a) -----
            a_t = self.alpha[t, :]  # from psi_nb
            Z_new = a_t[np.newaxis, :] + beta_t[np.newaxis, :] - L

            # ----- (8) 옵션: BCJR에서 계산한 SOVA-style LLR 주입 -----
            if kappa and llr_t is not None:
                for i in range(N):
                    Z_new[i, i] += kappa * llr_t[t, i]

            # ----- (9) ζ damping -----
            Z_prev = self._zeta_prev[t]
            Z = damp_zeta * Z_new + (1 - damp_zeta) * Z_prev
            self.zeta[t] = Z

            # ----- (10) δ̃_it = ζ_it(i) - max_{a≠i} ζ_it(a) -----
            for i in range(N):
                zi = Z[i, :]
                self.delta_tilde[i, t] = 0.0 if N == 1 else (zi[i] - np.max(np.delete(zi, i)))

        # 캐시 갱신
        self._L_prev    = [self.lambda_[t].copy() for t in range(T)]
        self._beta_prev = [self.beta[t].copy()    for t in range(T)]
        self._zeta_prev = [self.zeta[t].copy()    for t in range(T)]

    def _update_gamma_omega(self):
        gamma_new = self.eta_tilde + self.delta_tilde
        omega_new = self.phi_tilde + self.delta_tilde
        self.gamma_tilde = self.damp * gamma_new + (1 - self.damp) * self.gamma_tilde
        self.omega_tilde = self.damp * omega_new + (1 - self.damp) * self.omega_tilde

    # ===================== Decode (same as PatchA) =====================
    def estimate_route(self):
        full_mask = (1 << self.N) - 1
        best_val = NEG
        best_last = -1

        for last in range(self.N):
            base = self.psi_wb[self.T, full_mask, last]
            if base <= NEG / 2:
                continue
            val = base + self.s[last, self.depot]  # closure
            if val > best_val:
                best_val = val
                best_last = last

        # backtrack
        if best_last < 0:
            # fallback: α 기반 greedy
            route_internal = [self.depot]
            used = set()
            for t in range(self.T):
                sc = self.alpha[t].copy()
                for u in used:
                    sc[u] = NEG
                if self.tiny_tiebreak:
                    sc += 1e-15 * np.arange(self.N)
                a = int(np.argmax(sc))
                used.add(a)
                route_internal.append(a)
            route_internal.append(self.depot)
        else:
            route_inner = []
            mask = full_mask
            last = best_last
            t = self.T
            while t > 0 and 0 <= last < self.N:  # 센티넬(-1) 만나면 종료
                route_inner.append(last)
                prev_mask, prev_last = self.backptr[t, mask, last]
                mask, last = prev_mask, prev_last
                t -= 1
            route_inner.reverse()
            route_internal = [self.depot] + route_inner + [self.depot]

        return [int(self.inv_perm[c]) for c in route_internal]

    def _route_cost(self, route):
        return float(sum(self.orig_D[route[k], route[k + 1]] for k in range(len(route) - 1)))


In [7]:
import numpy as np

class TSPBitmask:
    def __init__(self, dist, max=50, verbose=False):
        self.dist = dist
        self.verbose = verbose
        self.n = dist.shape[0]
        self.dp = {}
        self.tour = []
        self.min_cost = np.inf
        self.max = max

    def run(self):
        n = self.n
        cities = list(range(n - 1))  # exclude depot

        dp = {}  # (visited_mask, current_city) → (cost, prev_city)

        # Initialize: depot → i
        for i in cities:
            dp[(1 << i, i)] = (self.dist[n - 1][i], n - 1)

        for visited in range(1 << (n - 1)):
            for u in cities:
                if not (visited & (1 << u)):
                    continue
                for v in cities:
                    if visited & (1 << v):
                        continue
                    if self.dist[u][v] >= self.max:
                        continue
                    new_visited = visited | (1 << v)
                    prev_cost = dp.get((visited, u), (np.inf, -1))[0]
                    new_cost = prev_cost + self.dist[u][v]
                    if (new_visited, v) not in dp or new_cost < dp[(new_visited, v)][0]:
                        dp[(new_visited, v)] = (new_cost, u)

        end_mask = (1 << (n - 1)) - 1
        min_cost = np.inf
        last_city = -1
        for u in cities:
            cost_to_depot = dp.get((end_mask, u), (np.inf, -1))[0] + self.dist[u][n - 1]
            if cost_to_depot < min_cost:
                min_cost = cost_to_depot
                last_city = u

        # Reconstruct path
        tour = [n - 1]  # start from depot
        mask = end_mask
        curr = last_city
        for _ in range(n - 1):
            tour.append(curr)
            mask, curr = mask ^ (1 << curr), dp[(mask, curr)][1]
        tour.append(n - 1)
        tour.reverse()

        self.dp = dp
        self.tour = tour
        self.min_cost = min_cost

        if self.verbose:
            print(f"Tour (1-based): {[x+1 for x in tour]}")
            print(f"Total Cost: {min_cost:.4f}")

        return tour, min_cost

    def get_path(self):
        return self.tour

    def get_cost(self):
        return self.min_cost

In [8]:
N = 13
d = np.random.rand(N,N)
import time

start_time1 = time.time()
solver_1 = TSPBitmask(d, verbose=False)
path1, cost1 = solver_1.run()
end_time1 = time.time()
print(path1, cost1, end_time1-start_time1)

start_time2 = time.time()
solver_2 = TSPHypercubeSimplified_PatchA(d, iters=200, start_city=N-1, verbose=False)
path2, cost2 = solver_2.run()
end_time2 = time.time()
print(path2, cost2, end_time2-start_time2)

start_time3 = time.time()
solver_3 = TSPHypercubeSimplified_PatchA_BCJR(d, iters=200, start_city=N-1, verbose=False)
path3, cost3 = solver_3.run()
end_time3 = time.time()
print(path3, cost3, end_time3-start_time3)

[12, 5, 1, 10, 6, 11, 4, 3, 7, 9, 2, 0, 8, 12] 1.567148170234359 0.22808337211608887
[12, 5, 1, 10, 6, 11, 4, 3, 7, 9, 2, 0, 8, 12] 1.567148170234359 3.2323782444000244
[12, 5, 1, 10, 6, 11, 4, 3, 7, 9, 2, 0, 8, 12] 1.567148170234359 8.61917757987976


In [None]:
import tarfile
# 압축 풀기
with tarfile.open("ALL_tsp.tar.gz", "r:gz") as tar:
    tar.extractall(path="ALL_tsp")  # output_dir에 풀림
!pip install tsplib95
import tsplib95

import gzip
import shutil
import os

def load_tsplib(name, dir_path="ALL_tsp"):
    tsp_path = os.path.join(dir_path, f"{name}.tsp")
    gz_path = tsp_path + ".gz"

    # .tsp 없고 .gz만 있으면 압축 해제
    if not os.path.exists(tsp_path):
        if os.path.exists(gz_path):
            with gzip.open(gz_path, "rb") as f_in:
                with open(tsp_path, "wb") as f_out:
                    shutil.copyfileobj(f_in, f_out)
        else:
            raise FileNotFoundError(f"{tsp_path} 또는 {gz_path} 가 존재하지 않습니다.")

    # tsplib95로 불러오기
    problem = tsplib95.load(tsp_path)
    return problem

# TSP 최적해 데이터 딕셔너리
tsp_dict = {
    "a280": 2579,
    "ali535": 202339,
    "att48": 10628,
    "att532": 27686,
    "bayg29": 1610,
    "bays29": 2020,
    "berlin52": 7542,
    "bier127": 118282,
    "brazil58": 25395,
    "brd14051": 469385,
    "brg180": 1950,
    "burma14": 3323,
    "ch130": 6110,
    "ch150": 6528,
    "d198": 15780,
    "d493": 35002,
    "d657": 48912,
    "d1291": 50801,
    "d1655": 62128,
    "d2103": 80450,
    "d15112": 1573084,
    "d18512": 645238,
    "dantzig42": 699,
    "dsj1000": 18659688,
    "eil51": 426,
    "eil76": 538,
    "eil101": 629,
    "fl417": 11861,
    "fl1400": 20127,
    "fl1577": 22249,
    "fl3795": 28772,
    "fnl4461": 182566,
    "fri26": 937,
    "gil262": 2378,
    "gr17": 2085,
    "gr21": 2707,
    "gr24": 1272,
    "gr48": 5046,
    "gr96": 55209,
    "gr120": 6942,
    "gr137": 69853,
    "gr202": 40160,
    "gr229": 134602,
    "gr431": 171414,
    "gr666": 294358,
    "hk48": 11461,
    "kroA100": 21282,
    "kroB100": 22141,
    "kroC100": 20749,
    "kroD100": 21294,
    "kroE100": 22068,
    "kroA150": 26524,
    "kroB150": 26130,
    "kroA200": 29368,
    "kroB200": 29437,
    "lin105": 14379,
    "lin318": 42029,
    "linhp318": 41345,
    "nrw1379": 56638,
    "p654": 34643,
    "pa561": 2763,
    "pcb442": 50778,
    "pcb1173": 56892,
    "pcb3038": 137694,
    "pla7397": 23260728,
    "pla33810": 66048945,
    "pla85900": 142382641,
    "pr76": 108159,
    "pr107": 44303,
    "pr124": 59030,
    "pr136": 96772,
    "pr144": 58537,
    "pr152": 73682,
    "pr226": 80369,
    "pr264": 49135,
    "pr299": 48191,
    "pr439": 107217,
    "pr1002": 259045,
    "pr2392": 378032,
    "rat99": 1211,
    "rat195": 2323,
    "rat575": 6773,
    "rat783": 8806,
    "rd100": 7910,
    "rd400": 15281,
    "rl1304": 252948,
    "rl1323": 270199,
    "rl1889": 316536,
    "rl5915": 565530,
    "rl5934": 556045,
    "rl11849": 923288,
    "si175": 21407,
    "si535": 48450,
    "si1032": 92650,
    "st70": 675,
    "swiss42": 1273,
    "ts225": 126643,
    "tsp225": 3916,
    "u159": 42080,
    "u574": 36905,
    "u724": 41910,
    "u1060": 224094,
    "u1432": 152970,
    "u1817": 57201,
    "u2152": 64253,
    "u2319": 234256,
    "ulysses16": 6859,
    "ulysses22": 7013,
    "usa13509": 19982859,
    "vm1084": 239297,
    "vm1748": 336556,
}

  tar.extractall(path="ALL_tsp")  # output_dir에 풀림




In [None]:
import numpy as np

def project_to_permutation(path_raw, N, start):
    """
    solver 출력(중복/누락 허용)을 0..N-1 순열로 투영(그리디 보충 제거판).
    - 범위 [0, N-1] 이외 값 제거
    - '첫 등장'만 유지해서 중복 제거
    - start가 없으면 맨 앞에 삽입
    - 누락 도시는 '오름차순'으로 뒤에 붙임(결정적 규칙)
    - 시작도시 start를 맨 앞으로 회전
    """
    # 1) 범위 필터
    inner = [p for p in path_raw if 0 <= p < N]

    # 2) 첫 등장만 유지(중복 제거)
    order, seen = [], set()
    for p in inner:
        if p not in seen:
            seen.add(p)
            order.append(p)

    # 3) start 미등장 시 맨 앞에 삽입
    if start not in seen:
        order.insert(0, start)
        seen.add(start)

    # 4) 누락 도시: 오름차순으로 뒤에 붙이기(그리디 X)
    missing = [i for i in range(N) if i not in seen]
    if missing:
        order.extend(sorted(missing))

    # 5) 시작도시를 맨 앞으로 회전
    if order[0] != start:
        s = order.index(start)
        order = order[s:] + order[:s]

    # 6) 길이 보정(안전장치)
    return order[:N]

def tsplib_cycle_cost(problem, order_idx, idx2lab):
    """TSPLIB 가중치로 순환 비용(끝→처음 포함)."""
    tot = 0
    for i in range(len(order_idx)):
        a = idx2lab[order_idx[i]]
        b = idx2lab[order_idx[(i + 1) % len(order_idx)]]
        tot += problem.get_weight(a, b)
    return tot

# 1) 문제 로드
name = "burma14"
problem = load_tsplib(name)
nodes = list(problem.get_nodes())           # TSPLIB 라벨(보통 1..N)
N = len(nodes)

# 라벨↔인덱스 매핑
lab2idx = {lab: i for i, lab in enumerate(nodes)}
idx2lab = {i: lab for i, lab in enumerate(nodes)}

# 2) TSPLIB 규칙으로 거리행렬 만들기 (정수 가중치)
D = np.zeros((N, N), dtype=np.int32)
for a in nodes:
    ia = lab2idx[a]
    for b in nodes:
        ib = lab2idx[b]
        if ia != ib:
            D[ia, ib] = problem.get_weight(a, b)

# 0) 준비
#BIG = np.inf
rng = np.random.default_rng(0)
start = 0

# 2) 거리행렬 D: 자기루프 금지 + 타이 깨는 지터(결정적 재현)
D = D.astype(float)
D += 1e-6 * rng.standard_normal(D.shape)
D *= 1.1

# 3) 솔버 실행: depot 없이 D로
solver = TSPHypercubeSimplified_PatchA(D)
path_idx_raw, _ = solver.run()  # solver는 임의 시퀀스 반환 가능(중복/누락 허용)

# 4) 유효 순열 투영(그리디 보충 제거)
path_idx = project_to_permutation(path_idx_raw, N, start)

# 5) TSPLIB 채점(끝→처음 포함)
final_cost = tsplib_cycle_cost(problem, path_idx, idx2lab)
OPT = tsp_dict[name]
gap = (final_cost - OPT) / OPT * 100.0

print("order:", path_idx)
print("TSPLIB-accurate cost:", final_cost)
print("OPT:", OPT)
print("gap [%]:", f"{gap:.2f}")


order: [0, 1, 13, 2, 3, 4, 5, 11, 6, 12, 7, 10, 8, 9]
TSPLIB-accurate cost: 3323
OPT: 3323
gap [%]: 0.00
