In [1]:
# ============================================================
# üß† Dual Neural Expert v3.0 ‚Äî InfoNCE + True ST Embedding
#  - M·ª•c ti√™u:
#    ‚Ä¢ Anchor@1 cao h∆°n nh·ªù in-batch hard negatives (InfoNCE)
#    ‚Ä¢ Context@1 ·ªïn ƒë·ªãnh, kh√¥ng l·ªách so v·ªõi encoder g·ªëc
#    ‚Ä¢ Kh√¥ng ƒë·ª•ng internal .auto_model, d√πng SentenceTransformer ƒë√∫ng chu·∫©n
# ============================================================

import os
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from sentence_transformers import SentenceTransformer

# ------------------------------------------------------------
# ‚öôÔ∏è CONFIGURATION
# ------------------------------------------------------------
DATA_FILE = "Advice.csv"                      # synthetic Advice dataset
MODEL_NAME = "google/embeddinggemma-300m"     # base encoder
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

EPOCHS = 20
BATCH_SIZE = 32
LR = 2e-5
SEED = 42

# InfoNCE temperature
TEMP_ANCHOR = 0.07
TEMP_CONTEXT = 0.07

# Loss weights
LAMBDA_ANCHOR  = 1.0
LAMBDA_CONTEXT = 2.0   # boost context
LAMBDA_HASCTX  = 0.5

SAVE_PATH = "models_anchor/dual_expert_Advice_v3_0"
os.makedirs(SAVE_PATH, exist_ok=True)

random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)


# ------------------------------------------------------------
# üß© Dataset
#   Expect columns: question, expert_label, anchor, has_context, context_name
# ------------------------------------------------------------
class DualAnchorDataset(Dataset):
    def __init__(self, df):
        self.data = df.to_dict("records")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data[idx]

        q = str(row["question"])
        anchor = str(row["anchor"])

        has_context = int(row.get("has_context", 0))

        if has_context == 1 and pd.notna(row.get("context_name", "")):
            context = str(row["context_name"])
        else:
            context = ""

        return q, anchor, context, has_context


# ------------------------------------------------------------
# üß† Dual Expert Adapter
#   - Nh·∫≠n embedding t·ª´ SentenceTransformer
#   - Tr·∫£ ra anchor_emb, context_emb, has_context_logit
# ------------------------------------------------------------
class DualExpertAdapter(nn.Module):
    def __init__(self, input_dim=768, hidden_dim=256):
        super().__init__()

        self.anchor_adapter = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim)
        )

        self.context_adapter = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim)
        )

        self.has_context_head = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )

    def forward(self, x):
        # x: (B, D) from SentenceTransformer
        anchor_emb = F.normalize(self.anchor_adapter(x), p=2, dim=1)
        context_emb = F.normalize(self.context_adapter(x), p=2, dim=1)
        has_ctx_logit = self.has_context_head(x).squeeze(-1)
        return anchor_emb, context_emb, has_ctx_logit


# ------------------------------------------------------------
# üîß Helper: encode text with SentenceTransformer
#   - ƒê·∫£m b·∫£o d√πng pooling chu·∫©n c·ªßa SentenceTransformer
# ------------------------------------------------------------
def encode_texts(encoder: SentenceTransformer, texts, batch_size=64):
    # texts: list[str]
    # Tr·∫£ v·ªÅ: Tensor (N, D) tr√™n DEVICE
    with torch.no_grad():
        emb = encoder.encode(
            texts,
            batch_size=batch_size,
            convert_to_tensor=True,
            device=DEVICE,
            show_progress_bar=False
        )
    return emb


# ------------------------------------------------------------
# üöÄ Training loop ‚Äî Dual Expert v3.0
# ------------------------------------------------------------
def train_dual_expert_v3():
    print("\n=== üß© Training Dual Expert v3.0 ‚Äî InfoNCE Edition ===")

    # -----------------------------
    # Load & clean data
    # -----------------------------
    df = pd.read_csv(DATA_FILE)
    df = df[df["expert_label"] == "Advice"].dropna(subset=["question", "anchor"])

    if "context_name" not in df.columns:
        df["context_name"] = ""
    if "has_context" not in df.columns:
        df["has_context"] = 0

    df["has_context"] = df["has_context"].fillna(0).astype(int)

    # ƒê·∫£m b·∫£o: n·∫øu has_context = 0 th√¨ context_name = ""
    df["context_name"] = df.apply(
        lambda x: str(x["context_name"]) if x["has_context"] == 1 and pd.notna(x["context_name"]) else "",
        axis=1
    )

    print(f"‚úÖ Loaded {len(df)} Advice samples")

    dataset = DualAnchorDataset(df)
    loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

    # -----------------------------
    # Base encoder (frozen)
    # -----------------------------
    base_encoder = SentenceTransformer(MODEL_NAME)
    base_encoder.to(DEVICE)
    base_encoder.eval()
    for p in base_encoder.parameters():
        p.requires_grad = False

    # L·∫•y dim t·ª´ SentenceTransformer (an to√†n)
    try:
        input_dim = base_encoder.get_sentence_embedding_dimension()
    except Exception:
        # fallback n·∫øu model kh√¥ng c√≥ h√†m n√†y
        test_emb = encode_texts(base_encoder, ["test"])
        input_dim = test_emb.size(1)

    adapter = DualExpertAdapter(input_dim=input_dim, hidden_dim=256).to(DEVICE)

    optimizer = torch.optim.AdamW(adapter.parameters(), lr=LR)
    bce_loss = nn.BCEWithLogitsLoss()

    use_amp = DEVICE == "cuda"
    scaler = torch.amp.GradScaler("cuda") if use_amp else None

    # -----------------------------
    # Training epochs
    # -----------------------------
    for epoch in range(EPOCHS):
        adapter.train()
        total_loss = 0.0
        total_anchor_loss = 0.0
        total_ctx_loss = 0.0
        total_hasctx_loss = 0.0

        pbar = tqdm(loader, desc=f"Epoch {epoch+1}/{EPOCHS}")

        for batch in pbar:
            q_texts, a_texts, c_texts, has_ctx = batch
            has_ctx = torch.tensor(has_ctx, dtype=torch.float32, device=DEVICE)

            if use_amp:
                ctx_mgr = torch.amp.autocast("cuda")
            else:
                # no-op context manager
                from contextlib import nullcontext
                ctx_mgr = nullcontext()

            with ctx_mgr:
                # 1) Encode texts -> embeddings (SentenceTransformer pooling chu·∫©n)
                q_emb = encode_texts(base_encoder, list(q_texts))
                a_emb = encode_texts(base_encoder, list(a_texts))

                # Context embeddings: encode c·∫£ batch (k·ªÉ c·∫£ ch·ªó r·ªóng, s·∫Ω mask sau)
                c_emb = encode_texts(base_encoder, list(c_texts))

                # 2) Adapter forward
                q_anchor, q_ctx, has_ctx_logit = adapter(q_emb)
                a_anchor, _, _ = adapter(a_emb)
                _, c_ctx, _ = adapter(c_emb)

                # ========== Anchor InfoNCE Loss ==========
                # sim(q_i, a_j) cho to√†n batch
                sim_anchor = torch.matmul(q_anchor, a_anchor.T) / TEMP_ANCHOR   # (B, B)
                target = torch.arange(sim_anchor.size(0), device=DEVICE)       # m·ªói q kh·ªõp anchor c√πng index
                loss_anchor = F.cross_entropy(sim_anchor, target)

                # ========== Context InfoNCE Loss ==========
                mask_ctx = has_ctx > 0.5
                if mask_ctx.sum() > 1:
                    q_ctx_pos = q_ctx[mask_ctx]
                    c_ctx_pos = c_ctx[mask_ctx]

                    # in-batch similarity
                    sim_ctx = torch.matmul(q_ctx_pos, c_ctx_pos.T) / TEMP_CONTEXT
                    target_ctx = torch.arange(sim_ctx.size(0), device=DEVICE)
                    loss_ctx = F.cross_entropy(sim_ctx, target_ctx)
                else:
                    loss_ctx = torch.tensor(0.0, device=DEVICE)

                # ========== HasContext BCE ==========
                loss_hasctx = bce_loss(has_ctx_logit, has_ctx)

                # ========== Total loss ==========
                loss = (
                    LAMBDA_ANCHOR  * loss_anchor +
                    LAMBDA_CONTEXT * loss_ctx +
                    LAMBDA_HASCTX  * loss_hasctx
                )

            optimizer.zero_grad()
            if use_amp:
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
            else:
                loss.backward()
                optimizer.step()

            total_loss += loss.item()
            total_anchor_loss += loss_anchor.item()
            total_ctx_loss += loss_ctx.item()
            total_hasctx_loss += loss_hasctx.item()

            step = pbar.n + 1
            pbar.set_postfix(
                loss=f"{total_loss/step:.4f}",
                anchor=f"{total_anchor_loss/step:.4f}",
                ctx=f"{total_ctx_loss/step:.4f}",
                hasctx=f"{total_hasctx_loss/step:.4f}"
            )

        n_steps = len(loader)
        print(
            f"‚úÖ Epoch {epoch+1} | "
            f"Loss={total_loss/n_steps:.4f} | "
            f"Anchor={total_anchor_loss/n_steps:.4f} | "
            f"Context={total_ctx_loss/n_steps:.4f} | "
            f"HasCtx={total_hasctx_loss/n_steps:.4f}"
        )

    # -----------------------------
    # Save adapter + encoder
    # -----------------------------
    torch.save(adapter.state_dict(), os.path.join(SAVE_PATH, "dual_adapter_v3.pt"))
    base_encoder.save(SAVE_PATH)

    print(f"\nüíæ Saved model & adapter to: {SAVE_PATH}")


# ------------------------------------------------------------
if __name__ == "__main__":
    train_dual_expert_v3()


  from .autonotebook import tqdm as notebook_tqdm



=== üß© Training Dual Expert v3.0 ‚Äî InfoNCE Edition ===
‚úÖ Loaded 20622 Advice samples


  has_ctx = torch.tensor(has_ctx, dtype=torch.float32, device=DEVICE)
Epoch 1/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:05<00:00,  9.89it/s, anchor=0.9520, ctx=2.1182, hasctx=0.5373, loss=5.4571]


‚úÖ Epoch 1 | Loss=5.4486 | Anchor=0.9505 | Context=2.1149 | HasCtx=0.5365


Epoch 2/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:02<00:00, 10.23it/s, anchor=0.0984, ctx=1.7833, hasctx=0.3746, loss=3.8522]


‚úÖ Epoch 2 | Loss=3.8522 | Anchor=0.0984 | Context=1.7833 | HasCtx=0.3746


Epoch 3/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:03<00:00, 10.14it/s, anchor=0.0782, ctx=1.7693, hasctx=0.2975, loss=3.7656]


‚úÖ Epoch 3 | Loss=3.7656 | Anchor=0.0782 | Context=1.7693 | HasCtx=0.2975


Epoch 4/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:03<00:00, 10.21it/s, anchor=0.0673, ctx=1.7606, hasctx=0.2345, loss=3.7056]


‚úÖ Epoch 4 | Loss=3.7056 | Anchor=0.0673 | Context=1.7606 | HasCtx=0.2345


Epoch 5/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:01<00:00, 10.42it/s, anchor=0.0644, ctx=1.7593, hasctx=0.1887, loss=3.6774]


‚úÖ Epoch 5 | Loss=3.6774 | Anchor=0.0644 | Context=1.7593 | HasCtx=0.1887


Epoch 6/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:03<00:00, 10.19it/s, anchor=0.0609, ctx=1.7587, hasctx=0.1583, loss=3.6575]


‚úÖ Epoch 6 | Loss=3.6575 | Anchor=0.0609 | Context=1.7587 | HasCtx=0.1583


Epoch 7/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:04<00:00, 10.05it/s, anchor=0.0615, ctx=1.7550, hasctx=0.1381, loss=3.6406]


‚úÖ Epoch 7 | Loss=3.6406 | Anchor=0.0615 | Context=1.7550 | HasCtx=0.1381


Epoch 8/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:02<00:00, 10.28it/s, anchor=0.0573, ctx=1.7603, hasctx=0.1239, loss=3.6397]


‚úÖ Epoch 8 | Loss=3.6397 | Anchor=0.0573 | Context=1.7603 | HasCtx=0.1239


Epoch 9/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:02<00:00, 10.29it/s, anchor=0.0591, ctx=1.7572, hasctx=0.1135, loss=3.6302]


‚úÖ Epoch 9 | Loss=3.6246 | Anchor=0.0590 | Context=1.7544 | HasCtx=0.1133


Epoch 10/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:02<00:00, 10.29it/s, anchor=0.0529, ctx=1.7545, hasctx=0.1049, loss=3.6143]


‚úÖ Epoch 10 | Loss=3.6143 | Anchor=0.0529 | Context=1.7545 | HasCtx=0.1049


Epoch 11/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:03<00:00, 10.22it/s, anchor=0.0563, ctx=1.7493, hasctx=0.0979, loss=3.6038]


‚úÖ Epoch 11 | Loss=3.6038 | Anchor=0.0563 | Context=1.7493 | HasCtx=0.0979


Epoch 12/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:04<00:00, 10.02it/s, anchor=0.0514, ctx=1.7474, hasctx=0.0918, loss=3.5921]


‚úÖ Epoch 12 | Loss=3.5921 | Anchor=0.0514 | Context=1.7474 | HasCtx=0.0918


Epoch 13/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:04<00:00,  9.95it/s, anchor=0.0544, ctx=1.7580, hasctx=0.0868, loss=3.6138]


‚úÖ Epoch 13 | Loss=3.6138 | Anchor=0.0544 | Context=1.7580 | HasCtx=0.0868


Epoch 14/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:05<00:00,  9.83it/s, anchor=0.0507, ctx=1.7470, hasctx=0.0821, loss=3.5858]


‚úÖ Epoch 14 | Loss=3.5858 | Anchor=0.0507 | Context=1.7470 | HasCtx=0.0821


Epoch 15/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:01<00:00, 10.45it/s, anchor=0.0488, ctx=1.7528, hasctx=0.0783, loss=3.5937]


‚úÖ Epoch 15 | Loss=3.5937 | Anchor=0.0488 | Context=1.7528 | HasCtx=0.0783


Epoch 16/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:03<00:00, 10.11it/s, anchor=0.0497, ctx=1.7502, hasctx=0.0747, loss=3.5874]


‚úÖ Epoch 16 | Loss=3.5818 | Anchor=0.0496 | Context=1.7474 | HasCtx=0.0746


Epoch 17/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:02<00:00, 10.28it/s, anchor=0.0501, ctx=1.7521, hasctx=0.0713, loss=3.5899]


‚úÖ Epoch 17 | Loss=3.5899 | Anchor=0.0501 | Context=1.7521 | HasCtx=0.0713


Epoch 18/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:02<00:00, 10.27it/s, anchor=0.0475, ctx=1.7521, hasctx=0.0682, loss=3.5858]


‚úÖ Epoch 18 | Loss=3.5858 | Anchor=0.0475 | Context=1.7521 | HasCtx=0.0682


Epoch 19/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:02<00:00, 10.28it/s, anchor=0.0459, ctx=1.7495, hasctx=0.0655, loss=3.5776]


‚úÖ Epoch 19 | Loss=3.5776 | Anchor=0.0459 | Context=1.7495 | HasCtx=0.0655


Epoch 20/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 644/644 [01:03<00:00, 10.21it/s, anchor=0.0526, ctx=1.7556, hasctx=0.0628, loss=3.5953]


‚úÖ Epoch 20 | Loss=3.5897 | Anchor=0.0525 | Context=1.7529 | HasCtx=0.0627

üíæ Saved model & adapter to: models_anchor/dual_expert_Advice_v3_0


In [10]:
# ============================================================
# üîç Dual Expert v3.0 ‚Äî TEST FILE (FIXED VERSION)
# ============================================================

import os
import torch
import torch.nn.functional as F
import pandas as pd
from sentence_transformers import SentenceTransformer


# ============================================================
# üß† Dual Expert Adapter (same as training)
# ============================================================

class DualExpertAdapter(torch.nn.Module):
    def __init__(self, input_dim=768, hidden_dim=256):
        super().__init__()

        self.anchor_adapter = torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, input_dim)
        )

        self.context_adapter = torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, input_dim)
        )

        self.has_context_head = torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, 1)
        )

    def forward(self, x):
        anchor_emb = F.normalize(self.anchor_adapter(x), p=2, dim=1)
        context_emb = F.normalize(self.context_adapter(x), p=2, dim=1)
        has_ctx_logit = self.has_context_head(x).squeeze(-1)
        return anchor_emb, context_emb, has_ctx_logit



# ============================================================
# ‚öô CONFIG
# ============================================================

MODEL_DIR = "models_anchor/dual_expert_Advice_v3_0"   # folder model b·∫°n l∆∞u
ADAPTER_PATH = f"{MODEL_DIR}/dual_adapter_v3.pt"
DATA_FILE = "Advice.csv"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
TOP_K = 5



# ============================================================
# üöÄ LOAD MODEL
# ============================================================

print("üîÑ Loading SentenceTransformer encoder...")
encoder = SentenceTransformer(MODEL_DIR).to(DEVICE)
encoder.eval()

try:
    emb_dim = encoder.get_sentence_embedding_dimension()
except:
    emb_dim = encoder.encode(["hi"], convert_to_tensor=True).shape[1]

print(f"üìê Embedding dimension = {emb_dim}")


print("üîÑ Loading DualExpertAdapter...")
adapter = DualExpertAdapter(input_dim=emb_dim)
adapter.load_state_dict(torch.load(ADAPTER_PATH, map_location=DEVICE))
adapter = adapter.to(DEVICE)
adapter.eval()



# ============================================================
# üì• LOAD ANCHORS + CONTEXTS
# ============================================================

df = pd.read_csv(DATA_FILE)
df = df[df["expert_label"] == "Advice"]

anchors = sorted(df["anchor"].unique())
contexts = sorted([c for c in df["context_name"].unique() if isinstance(c, str) and c.strip() != ""])

print(f"üìå Loaded {len(anchors)} anchors")
print(f"üìå Loaded {len(contexts)} contexts")



# ============================================================
# ‚ö° PRE-ENCODE ANCHORS + CONTEXTS (FIXED)
# ============================================================

print("\n‚ö° Encoding anchors...")
anchor_emb = encoder.encode(anchors, convert_to_tensor=True, device=DEVICE)
anchor_emb = anchor_emb.detach().clone()
anchor_emb = adapter.anchor_adapter(anchor_emb)
anchor_emb = F.normalize(anchor_emb, p=2, dim=1)


print("‚ö° Encoding contexts...")
if len(contexts) > 0:
    context_emb = encoder.encode(contexts, convert_to_tensor=True, device=DEVICE)
    context_emb = context_emb.detach().clone()
    context_emb = adapter.context_adapter(context_emb)
    context_emb = F.normalize(context_emb, p=2, dim=1)
else:
    context_emb = None



# ============================================================
# üîç PREDICT FUNCTION
# ============================================================

def predict(question, top_k=TOP_K):
    print("\n==========================")
    print(f"üîé QUESTION: {question}")
    print("==========================")

    # Encode question
    q_emb = encoder.encode([question], convert_to_tensor=True, device=DEVICE)
    q_emb = q_emb.detach().clone()

    q_anchor, q_ctx, q_hasctx = adapter(q_emb)

    # ---- ANCHOR SIM ----
    sim_anchor = (q_anchor @ anchor_emb.T)[0]
    top_anchor_idx = torch.topk(sim_anchor, top_k).indices.tolist()
    top_anchors = [(anchors[i], float(sim_anchor[i])) for i in top_anchor_idx]

    # ---- HAS-CONTEXT ----
    has_ctx_prob = torch.sigmoid(q_hasctx).item()

    # ---- CONTEXT SIM ----
    if context_emb is not None:
        sim_ctx = (q_ctx @ context_emb.T)[0]
        top_ctx_idx = torch.topk(sim_ctx, top_k).indices.tolist()
        top_contexts = [(contexts[i], float(sim_ctx[i])) for i in top_ctx_idx]
    else:
        top_contexts = []

    # PRINT
    print("\nüéØ TOP ANCHORS:")
    for name, score in top_anchors:
        print(f"  ‚Ä¢ {name:<30} ‚Üí {score:.4f}")

    print(f"\nüîÆ Has-Context Probability: {has_ctx_prob:.4f}")

    print("\nüåç TOP CONTEXTS:")
    for name, score in top_contexts:
        print(f"  ‚Ä¢ {name:<30} ‚Üí {score:.4f}")

    return {
        "anchors": top_anchors,
        "contexts": top_contexts,
        "has_context": has_ctx_prob
    }


# ============================================================
# üß™ INTERACTIVE CHAT MODE (ENTER LI√äN T·ª§C)
# ============================================================

print("\nüí¨ Ready! Nh·∫≠p c√¢u h·ªèi ƒë·ªÉ test (g√µ 'exit' ƒë·ªÉ tho√°t).")

while True:
    q = input("\n‚ùì C√¢u h·ªèi: ").strip()
    if q.lower() == "exit":
        print("üëã Bye!")
        break
    if q == "":
        continue   # enter r·ªóng th√¨ b·ªè qua
    
    predict(q)


üîÑ Loading SentenceTransformer encoder...


The tokenizer you are loading from 'models_anchor/dual_expert_Advice_v3_0' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.


üìê Embedding dimension = 768
üîÑ Loading DualExpertAdapter...
üìå Loaded 474 anchors
üìå Loaded 5 contexts

‚ö° Encoding anchors...
‚ö° Encoding contexts...

üí¨ Ready! Nh·∫≠p c√¢u h·ªèi ƒë·ªÉ test (g√µ 'exit' ƒë·ªÉ tho√°t).

üîé QUESTION: T√¥i b·ªã HIV trong thai k·ª≥ th√¨ n√™n l√†m g√¨?

üéØ TOP ANCHORS:
  ‚Ä¢ HIV                            ‚Üí 0.7663
  ‚Ä¢ AIDS                           ‚Üí 0.5159
  ‚Ä¢ ƒêau khi quan h·ªá t√¨nh d·ª•c       ‚Üí 0.3026
  ‚Ä¢ Virus Zika                     ‚Üí 0.2919
  ‚Ä¢ Vi√™m gan si√™u vi B             ‚Üí 0.2770

üîÆ Has-Context Probability: 0.9866

üåç TOP CONTEXTS:
  ‚Ä¢ Thai k·ª≥                        ‚Üí 0.7867
  ‚Ä¢ Tr∆∞·ªõc thai k·ª≥                  ‚Üí -0.0606
  ‚Ä¢ Cu·ªëi thai k·ª≥                   ‚Üí -0.0759
  ‚Ä¢ Qu√° tr√¨nh sinh n·ªü              ‚Üí -0.3710
  ‚Ä¢ Chuy·ªÉn d·∫°                      ‚Üí -0.3936

üîé QUESTION: T√¥i b·ªã hiv trong thai k·ª≥ th√¨ n√™n l√†m g√¨

üéØ TOP ANCHORS:
  ‚Ä¢ HIV                      

KeyboardInterrupt: Interrupted by user

In [None]:
# ============================================================
# üìä Dual Expert v3.0 ‚Äî EVALUATION SCRIPT
# ============================================================

import os
import torch
import torch.nn.functional as F
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics import f1_score


# ============================================================
# üß† Load Adapter (same as training)
# ============================================================

class DualExpertAdapter(torch.nn.Module):
    def __init__(self, input_dim=768, hidden_dim=256):
        super().__init__()

        self.anchor_adapter = torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, input_dim)
        )

        self.context_adapter = torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, input_dim)
        )

        self.has_context_head = torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_dim, 1)
        )

    def forward(self, x):
        anchor_emb = F.normalize(self.anchor_adapter(x), p=2, dim=1)
        context_emb = F.normalize(self.context_adapter(x), p=2, dim=1)
        has_ctx_logit = self.has_context_head(x).squeeze(-1)
        return anchor_emb, context_emb, has_ctx_logit


# ============================================================
# ‚öô CONFIG
# ============================================================

MODEL_DIR = "models_anchor/dual_expert_Advice_v3_0"
ADAPTER_PATH = f"{MODEL_DIR}/dual_adapter_v3.pt"
DATA_FILE = "Advice_test.csv"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
TOP_K = [1, 3, 5]


# ============================================================
# üöÄ Load Encoder + Adapter
# ============================================================

print("üîÑ Loading SentenceTransformer...")
encoder = SentenceTransformer(MODEL_DIR).to(DEVICE)
encoder.eval()

try:
    emb_dim = encoder.get_sentence_embedding_dimension()
except:
    emb_dim = encoder.encode(["hi"], convert_to_tensor=True).shape[1]

print(f"üìê Embedding Dimension = {emb_dim}")

print("üîÑ Loading Adapter...")
adapter = DualExpertAdapter(input_dim=emb_dim)
adapter.load_state_dict(torch.load(ADAPTER_PATH, map_location=DEVICE))
adapter = adapter.to(DEVICE)
adapter.eval()


# ============================================================
# üì• Load Dataset Anchors + Contexts
# ============================================================

df = pd.read_csv(DATA_FILE)
df = df[df["expert_label"] == "Advice"]

anchors = sorted(df["anchor"].unique())
contexts = sorted([c for c in df["context_name"].unique() if isinstance(c, str) and c.strip() != ""])

print(f"üìå Found {len(anchors)} anchors")
print(f"üìå Found {len(contexts)} contexts")


# ============================================================
# ‚ö° Pre-encode Anchors + Contexts
# ============================================================

print("‚ö° Encoding anchors...")
anchor_emb = encoder.encode(anchors, convert_to_tensor=True, device=DEVICE)
anchor_emb = anchor_emb.detach().clone()
anchor_emb = adapter.anchor_adapter(anchor_emb)
anchor_emb = F.normalize(anchor_emb, p=2, dim=1)

print("‚ö° Encoding contexts...")
if len(contexts) > 0:
    context_emb = encoder.encode(contexts, convert_to_tensor=True, device=DEVICE)
    context_emb = context_emb.detach().clone()
    context_emb = adapter.context_adapter(context_emb)
    context_emb = F.normalize(context_emb, p=2, dim=1)
else:
    context_emb = None


# ============================================================
# üìä Evaluation Functions
# ============================================================

def topk_check(name_list, sim_scores, gold_name, k):
    top_k_idx = torch.topk(sim_scores, k).indices.tolist()
    top_k_names = [name_list[i] for i in top_k_idx]
    return gold_name in top_k_names


# ============================================================
# üìä Main Evaluation
# ============================================================

def evaluate():
    print("\nüöÄ Starting Evaluation...\n")

    anchor_correct = {1: 0, 3: 0, 5: 0}
    context_correct = {1: 0, 3: 0, 5: 0}
    hasctx_true = []
    hasctx_pred = []

    total = len(df)
    ctx_total = len(df[df["has_context"] == 1])

    for idx, row in df.iterrows():
        q = row["question"]
        gold_anchor = row["anchor"]
        gold_ctx = row["context_name"] if row["has_context"] == 1 else None
        gold_has_ctx = int(row["has_context"])

        # Encode question
        q_emb = encoder.encode([q], convert_to_tensor=True, device=DEVICE)
        q_emb = q_emb.detach().clone()

        q_anchor, q_ctx, q_hasctx = adapter(q_emb)

        # ---------- ANCHOR CHECK ----------
        sim_anchor = (q_anchor @ anchor_emb.T)[0]

        for k in TOP_K:
            if topk_check(anchors, sim_anchor, gold_anchor, k):
                anchor_correct[k] += 1

        # ---------- CONTEXT CHECK ----------
        if gold_ctx and context_emb is not None:
            sim_ctx = (q_ctx @ context_emb.T)[0]

            for k in TOP_K:
                if topk_check(contexts, sim_ctx, gold_ctx, k):
                    context_correct[k] += 1

        # ---------- HAS-CONTEXT ----------
        hasctx_pred_prob = torch.sigmoid(q_hasctx).item()
        hasctx_pred_bin = 1 if hasctx_pred_prob >= 0.5 else 0

        hasctx_true.append(gold_has_ctx)
        hasctx_pred.append(hasctx_pred_bin)

    # =======================================================
    # üìà Final Metrics
    # =======================================================

    print("\n===============================")
    print("üìä FINAL EVALUATION RESULTS")
    print("===============================")

    print("\nüéØ Anchor Accuracy:")
    for k in TOP_K:
        print(f"  Top-{k}: {anchor_correct[k] / total:.4f}")

    print("\nüåç Context Accuracy (only samples with context):")
    for k in TOP_K:
        print(f"  Top-{k}: {context_correct[k] / ctx_total:.4f}")

    print("\nüîÆ Has-Context F1 Score:")
    print("  F1:", f"{f1_score(hasctx_true, hasctx_pred):.4f}")


if __name__ == "__main__":
    evaluate()


üîÑ Loading SentenceTransformer...


The tokenizer you are loading from 'models_anchor/dual_expert_Advice_v3_0' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.


üìê Embedding Dimension = 768
üîÑ Loading Adapter...
üìå Found 137 anchors
üìå Found 5 contexts
‚ö° Encoding anchors...
‚ö° Encoding contexts...

üöÄ Starting Evaluation...


üìä FINAL EVALUATION RESULTS

üéØ Anchor Accuracy:
  Top-1: 0.9948
  Top-3: 1.0000
  Top-5: 1.0000

üåç Context Accuracy (only samples with context):
  Top-1: 1.0000
  Top-3: 1.0000
  Top-5: 1.0000

üîÆ Has-Context F1 Score:
  F1: 0.9967


In [14]:
# ============================================================
# üß™ Test Dual Expert v3.0 (Anchor + Context + HasContext)
# ============================================================

import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from sentence_transformers import SentenceTransformer


# ------------------------------------------------------------
# ‚öôÔ∏è CONFIGURATION
# ------------------------------------------------------------
EXPERT_NAME = "Advice"

MODEL_PATH = f"models_anchor/dual_expert_{EXPERT_NAME}_v3_0"
ADAPTER_PATH = os.path.join(MODEL_PATH, "dual_adapter_v3.pt")

ANCHOR_FILE = "anchors.csv"
CONTEXT_FILE = "contexts.csv"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

TOP_K = 5


# ------------------------------------------------------------
# üß© Load Anchors + Contexts
# ------------------------------------------------------------
df_anchor = pd.read_csv(ANCHOR_FILE)
ANCHORS = df_anchor["name"].dropna().tolist()

if os.path.exists(CONTEXT_FILE):
    CONTEXTS = pd.read_csv(CONTEXT_FILE)["name"].dropna().tolist()
else:
    CONTEXTS = []

print(f"‚úÖ Loaded {len(ANCHORS)} anchors, {len(CONTEXTS)} contexts.")


# ------------------------------------------------------------
# üß† Dual Expert Adapter v3.0
# ------------------------------------------------------------
class DualExpertAdapter(nn.Module):
    def __init__(self, input_dim=768, hidden_dim=256):
        super().__init__()

        self.anchor_adapter = nn.Sequential(
            nn.Linear(input_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, input_dim)
        )
        self.context_adapter = nn.Sequential(
            nn.Linear(input_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, input_dim)
        )
        self.has_context_head = nn.Sequential(
            nn.Linear(input_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, 1)
        )

    def forward(self, x):
        a = F.normalize(self.anchor_adapter(x), p=2, dim=1)
        c = F.normalize(self.context_adapter(x), p=2, dim=1)
        h = self.has_context_head(x).squeeze(-1)
        return a, c, h


# ------------------------------------------------------------
# üöÄ Load model + adapter
# ------------------------------------------------------------
print("\nüîÑ Loading SentenceTransformer encoder...")
encoder = SentenceTransformer(MODEL_PATH).to(DEVICE)

try:
    EMB_DIM = encoder.get_sentence_embedding_dimension()
except:
    EMB_DIM = encoder.encode(["hi"], convert_to_tensor=True).shape[1]

adapter = DualExpertAdapter(input_dim=EMB_DIM).to(DEVICE)
adapter.load_state_dict(torch.load(ADAPTER_PATH, map_location=DEVICE))
adapter.eval()


# ------------------------------------------------------------
# üßÆ Helper ‚Äî Encode function (v3.0)
# ------------------------------------------------------------
def encode_embed(texts):
    emb = encoder.encode(texts, convert_to_tensor=True, device=DEVICE)
    emb = emb.detach().clone()  # FIX inference mode
    return adapter(emb)          # anchor_emb, context_emb, hasctx


# ============================================================
# üß™ INTERACTIVE TEST MODE
# ============================================================

print("\nüí¨ Ready! Nh·∫≠p c√¢u h·ªèi ƒë·ªÉ test (g√µ 'exit' ƒë·ªÉ tho√°t).")

while True:
    QUESTION = input("\n‚ùì C√¢u h·ªèi: ").strip()
    if QUESTION.lower() == "exit":
        print("\nüëã Bye!")
        break
    if QUESTION == "":
        continue

    print(f"\nüß™ Testing expert '{EXPERT_NAME}' on question:\n‚û°Ô∏è  \"{QUESTION}\"")

    # Encode c√¢u h·ªèi
    q_a, q_c, q_h = encode_embed([QUESTION])

    # Encode anchors
    a_embs, _, _ = encode_embed(ANCHORS)

    # Encode contexts
    if len(CONTEXTS) > 0:
        _, c_embs, _ = encode_embed(CONTEXTS)
    else:
        c_embs = None

    # ----- Compute similarity -----
    sim_anchor = F.cosine_similarity(q_a, a_embs)
    sim_context = F.cosine_similarity(q_c, c_embs) if c_embs is not None else None
    has_ctx_prob = torch.sigmoid(q_h).item()

    # ----- Best anchor -----
    best_anchor = ANCHORS[torch.argmax(sim_anchor).item()]

    # ----- Best context -----
    if c_embs is not None and has_ctx_prob > 0.35:
        best_context = CONTEXTS[torch.argmax(sim_context).item()]
    else:
        best_context = "(no context)"

    # --------------------------------------------------------
    # üîé PRINT RESULTS
    # --------------------------------------------------------
    print("\n=== üîé RESULTS ===")
    print(f"üéØ Predicted Anchor : {best_anchor}")
    print(f"üåê Predicted Context: {best_context}")
    print(f"üìà Has-Context prob : {has_ctx_prob:.4f}")

    print("\nTop 5 anchors:")
    for name, score in sorted(zip(ANCHORS, sim_anchor.tolist()), key=lambda x: x[1], reverse=True)[:TOP_K]:
        print(f"{name:<40} ‚Üí {score:.4f}")

    if c_embs is not None:
        print("\nTop 5 contexts:")
        for name, score in sorted(zip(CONTEXTS, sim_context.tolist()), key=lambda x: x[1], reverse=True)[:TOP_K]:
            print(f"{name:<40} ‚Üí {score:.4f}")


‚úÖ Loaded 409 anchors, 5 contexts.

üîÑ Loading SentenceTransformer encoder...


The tokenizer you are loading from 'models_anchor/dual_expert_Advice_v3_0' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.



üí¨ Ready! Nh·∫≠p c√¢u h·ªèi ƒë·ªÉ test (g√µ 'exit' ƒë·ªÉ tho√°t).

üß™ Testing expert 'Advice' on question:
‚û°Ô∏è  "n√™n l√†m g√¨ khi b·ªã cao huy·∫øt √°p trong thai k·ª≥"

=== üîé RESULTS ===
üéØ Predicted Anchor : Cao huy·∫øt √°p
üåê Predicted Context: Thai k·ª≥
üìà Has-Context prob : 0.9959

Top 5 anchors:
Cao huy·∫øt √°p                             ‚Üí 0.6793
Cao huy·∫øt √°p m√£n t√≠nh                    ‚Üí 0.4766
TƒÉng huy·∫øt √°p                            ‚Üí 0.4111
Huy·∫øt √°p cao                             ‚Üí 0.3806
TƒÉng huy·∫øt √°p m√£n t√≠nh                   ‚Üí 0.3542

Top 5 contexts:
Thai k·ª≥                                  ‚Üí 0.8825
Cu·ªëi thai k·ª≥                             ‚Üí -0.0568
Tr∆∞·ªõc khi thai k·ª≥                        ‚Üí -0.3305
Qu√° tr√¨nh sinh n·ªü                        ‚Üí -0.3572
Chuy·ªÉn d·∫°                                ‚Üí -0.3789


KeyboardInterrupt: Interrupted by user

In [None]:
# ============================================================
# üß™ Test Dual Expert v3.0 (with Anchor Layer Output)
# ============================================================

import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from sentence_transformers import SentenceTransformer


# ------------------------------------------------------------
# ‚öôÔ∏è CONFIGURATION
# ------------------------------------------------------------
EXPERT_NAME = "Advice"

MODEL_PATH = f"models_anchor/dual_expert_{EXPERT_NAME}_v3_0"
ADAPTER_PATH = os.path.join(MODEL_PATH, "dual_adapter_v3.pt")

ANCHOR_FILE = "anchors.csv"
CONTEXT_FILE = "contexts.csv"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

TOP_K = 5


# ------------------------------------------------------------
# üß© Load Anchors + Layers + Contexts
# ------------------------------------------------------------
df_anchor = pd.read_csv(ANCHOR_FILE)

ANCHORS = df_anchor["name"].tolist()
ANCHOR_LAYERS = df_anchor["layer"].tolist()   # ‚¨ÖÔ∏è LAYER HERE

if os.path.exists(CONTEXT_FILE):
    CONTEXTS = pd.read_csv(CONTEXT_FILE)["name"].dropna().tolist()
else:
    CONTEXTS = []

print(f"‚úÖ Loaded {len(ANCHORS)} anchors, {len(CONTEXTS)} contexts.")


# ------------------------------------------------------------
# üß† Dual Expert Adapter v3.0
# ------------------------------------------------------------
class DualExpertAdapter(nn.Module):
    def __init__(self, input_dim=768, hidden_dim=256):
        super().__init__()

        self.anchor_adapter = nn.Sequential(
            nn.Linear(input_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, input_dim)
        )
        self.context_adapter = nn.Sequential(
            nn.Linear(input_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, input_dim)
        )
        self.has_context_head = nn.Sequential(
            nn.Linear(input_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, 1)
        )

    def forward(self, x):
        a = F.normalize(self.anchor_adapter(x), p=2, dim=1)
        c = F.normalize(self.context_adapter(x), p=2, dim=1)
        h = self.has_context_head(x).squeeze(-1)
        return a, c, h


# ------------------------------------------------------------
# üöÄ Load model + adapter
# ------------------------------------------------------------
print("\nüîÑ Loading SentenceTransformer encoder...")
encoder = SentenceTransformer(MODEL_PATH).to(DEVICE)

try:
    EMB_DIM = encoder.get_sentence_embedding_dimension()
except:
    EMB_DIM = encoder.encode(["hi"], convert_to_tensor=True).shape[1]

adapter = DualExpertAdapter(input_dim=EMB_DIM).to(DEVICE)
adapter.load_state_dict(torch.load(ADAPTER_PATH, map_location=DEVICE))
adapter.eval()


# ------------------------------------------------------------
# üßÆ Helper ‚Äî Encode function (v3.0)
# ------------------------------------------------------------
def encode_embed(texts):
    emb = encoder.encode(texts, convert_to_tensor=True, device=DEVICE)
    emb = emb.detach().clone()  # FIX inference mode
    return adapter(emb)          # anchor_emb, context_emb, hasctx


# ============================================================
# üß™ INTERACTIVE TEST MODE
# ============================================================

print("\nüí¨ Ready! Nh·∫≠p c√¢u h·ªèi ƒë·ªÉ test (g√µ 'exit' ƒë·ªÉ tho√°t).")

while True:
    QUESTION = input("\n‚ùì C√¢u h·ªèi: ").strip()
    if QUESTION.lower() == "exit":
        print("\nüëã Bye!")
        break
    if QUESTION == "":
        continue

    print(f"\nüß™ Testing expert '{EXPERT_NAME}' on question:\n‚û°Ô∏è  \"{QUESTION}\"")

    # Encode c√¢u h·ªèi
    q_a, q_c, q_h = encode_embed([QUESTION])

    # Encode anchors & contexts
    a_embs, _, _ = encode_embed(ANCHORS)
    if len(CONTEXTS) > 0:
        _, c_embs, _ = encode_embed(CONTEXTS)
    else:
        c_embs = None

    # ----- Similarity -----
    sim_anchor = F.cosine_similarity(q_a, a_embs)
    sim_context = F.cosine_similarity(q_c, c_embs) if c_embs is not None else None
    has_ctx_prob = torch.sigmoid(q_h).item()

    # ----- Best anchor -----
    best_idx = torch.argmax(sim_anchor).item()
    best_anchor = ANCHORS[best_idx]
    best_layer = ANCHOR_LAYERS[best_idx]  # ‚¨ÖÔ∏è PRINT LAYER

    # ----- Best context -----
    if c_embs is not None and has_ctx_prob > 0.35:
        best_context = CONTEXTS[torch.argmax(sim_context).item()]
    else:
        best_context = "(no context)"

    # --------------------------------------------------------
    # üîé PRINT RESULTS (now with layer!)
    # --------------------------------------------------------
    print("\n=== üîé RESULTS ===")
    print(f"üéØ Predicted Anchor : {best_anchor}   ({best_layer})")
    print(f"üåê Predicted Context: {best_context}")
    print(f"üìà Has-Context prob : {has_ctx_prob:.4f}")

    print("\nTop 5 anchors:")
    top5 = sorted(
        list(zip(ANCHORS, ANCHOR_LAYERS, sim_anchor.tolist())),
        key=lambda x: x[2], reverse=True
    )[:TOP_K]

    for name, layer, score in top5:
        print(f"{name:<35} ({layer:<12}) ‚Üí {score:.4f}")

    if c_embs is not None:
        print("\nTop 5 contexts:")
        top5_ctx = sorted(
            list(zip(CONTEXTS, sim_context.tolist())),
            key=lambda x: x[1], reverse=True
        )[:TOP_K]

        for name, score in top5_ctx:
            print(f"{name:<40} ‚Üí {score:.4f}")


‚úÖ Loaded 408 anchors, 5 contexts.

üîÑ Loading SentenceTransformer encoder...


The tokenizer you are loading from 'models_anchor/dual_expert_Advice_v3_0' with an incorrect regex pattern: https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503/discussions/84#69121093e8b480e709447d5e. This will lead to incorrect tokenization. You should set the `fix_mistral_regex=True` flag when loading this tokenizer to fix this issue.



üí¨ Ready! Nh·∫≠p c√¢u h·ªèi ƒë·ªÉ test (g√µ 'exit' ƒë·ªÉ tho√°t).

üß™ Testing expert 'Advice' on question:
‚û°Ô∏è  "T√¥i b·ªã Ti·ªÅn ti·ªÉu ƒë∆∞·ªùng th√¨ n√™n l√†m g√¨"

=== üîé RESULTS ===
üéØ Predicted Anchor : Ti·ªÅn ti·ªÉu ƒë∆∞·ªùng   (Disease)
üåê Predicted Context: (no context)
üìà Has-Context prob : 0.0004

Top 5 anchors:
Ti·ªÅn ti·ªÉu ƒë∆∞·ªùng                     (Disease     ) ‚Üí 0.7076
Ti·ªÉu ƒë∆∞·ªùng                          (Disease     ) ‚Üí 0.4100
Ti·ªÅn s·∫£n gi·∫≠t                       (Disease     ) ‚Üí 0.3636
Ti·ªÅn s·∫£n gi·∫≠t                       (Topic       ) ‚Üí 0.3636
Ch·∫©n ƒëo√°n ti·ªÅn s·∫£n                  (Topic       ) ‚Üí 0.2764

Top 5 contexts:
Thai k·ª≥                                  ‚Üí 0.3170
Cu·ªëi thai k·ª≥                             ‚Üí 0.2872
Chuy·ªÉn d·∫°                                ‚Üí 0.1193
Qu√° tr√¨nh sinh n·ªü                        ‚Üí -0.2940
Tr∆∞·ªõc khi thai k·ª≥                        ‚Üí -0.4054
