In [None]:
!pip install bert-score

Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert-score
Successfully installed bert-score-0.3.13


MiniLM Morph Segmented dataset

In [None]:
# @title
# ============================================================
# SEG_AS_BASE — MORPH_ONLY vs DUAL_VIEW_FUSED (clean+morph)  ✅
# - Loads ONLY segmented dataset (contains @@)
# - One deterministic split (seed)
#
# MODE A: MORPH_ONLY
#   - Index questions = MORPH_MARKER(question) (keeps @@)
#
# MODE B: DUAL_VIEW_FUSED(alpha)
#   - q_clean = CLEAN(question)         (removes @@)
#   - q_morph = MORPH_MARKER(question)  (keeps @@)
#   - q_fused = normalize(alpha*q_clean + (1-alpha)*q_morph)
#
# Answers:
#   - Stored answers = MORPH_MARKER(answer)
#   - Answer embeddings also MORPH_MARKER
#
# Interactive QA: uses DUAL_VIEW_FUSED by default (can switch)
# Auto metrics AFTER exit:
#   Exact@1, TokenF1@1, MeanCos@1(QSim),
#   Semantic@1(ans_cos>=thr), BERTScore(optional)
# + CSV export per mode
#
# ✅ ONLY CHANGE REQUESTED:
#   Interactive output answer is CLEANED at the end (removes @@ only for printing).
#   Model/index/retrieval/eval logic unchanged.
# ============================================================

import os
os.environ["TRANSFORMERS_NO_TORCHVISION"] = "1"

import json, re, time, glob, random, csv
from pathlib import Path
from dataclasses import dataclass
from typing import List, Dict, Any, Tuple, Optional

import numpy as np
from sklearn.model_selection import train_test_split
from sentence_transformers import SentenceTransformer

try:
    from bert_score import score as bert_score
except Exception:
    bert_score = None


# ---------------------------
# CONFIG
# ---------------------------
DATA_PATH   = "kazakh_segmented_15000.json"
MODEL_NAME  = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"

SEED        = 42
TEST_SIZE   = 0.10
SEM_THR     = 0.85
DEVICE      = None  # None -> auto

# Dual-view fusion weight: alpha for CLEAN part
ALPHA       = 0.50  # try 0.3 / 0.5 / 0.7

EXPORT_CSV  = True
CSV_PATH_MORPH = "seg_morph_only_details.csv"
CSV_PATH_DUAL  = "seg_dual_fused_details.csv"


# ---------------------------
# Utility: file auto-find
# ---------------------------
def find_data_path(p: str) -> str:
    if Path(p).exists():
        return p
    candidates = [f"/content/{p}", f"/content/drive/MyDrive/{p}"]
    for c in candidates:
        if Path(c).exists():
            return c
    name = Path(p).name
    hits = glob.glob(f"**/{name}", recursive=True)
    if hits:
        return hits[0]
    near = glob.glob("**/*.json", recursive=True)
    raise FileNotFoundError(
        f"❌ File not found: {p}\nPWD: {Path.cwd()}\n"
        f"Found .json (first 30):\n" + "\n".join(near[:30])
    )


# ---------------------------
# Robust loader (JSON array / JSONL / brace-scan)
# ---------------------------
def load_qa_records(path: str) -> List[Dict[str, str]]:
    text = Path(path).read_text(encoding="utf-8", errors="ignore").strip()
    if not text:
        raise ValueError(f"Файл бос: {path}")

    if text[0] == "[":
        try:
            return _normalize_records(json.loads(text))
        except Exception:
            pass

    lines = [ln.strip().rstrip(",") for ln in text.splitlines() if ln.strip()]
    if lines and lines[0].startswith("{"):
        recs, ok = [], True
        for ln in lines:
            try:
                recs.append(json.loads(ln))
            except Exception:
                ok = False
                break
        if ok and recs:
            return _normalize_records(recs)

    objs, buf, depth = [], [], 0
    in_str, esc, started = False, False, False

    for ch in text:
        if not started:
            if ch == "{":
                started = True
                depth = 1
                buf = ["{"]
            continue

        buf.append(ch)

        if in_str:
            if esc:
                esc = False
            elif ch == "\\":
                esc = True
            elif ch == '"':
                in_str = False
        else:
            if ch == '"':
                in_str = True
            elif ch == "{":
                depth += 1
            elif ch == "}":
                depth -= 1
                if depth == 0:
                    obj_txt = "".join(buf)
                    buf = []
                    started = False
                    try:
                        objs.append(json.loads(obj_txt))
                    except Exception:
                        pass

    if not objs:
        raise ValueError(f"JSON оқу мүмкін болмады. Файл форматын тексеріңіз: {path}")
    return _normalize_records(objs)


def _normalize_records(data: Any) -> List[Dict[str, str]]:
    if not isinstance(data, list):
        raise ValueError("Дерек list болуы керек.")
    out = []
    for x in data:
        if not isinstance(x, dict):
            continue
        q = x.get("question") or x.get("instruction") or ""
        a = x.get("answer") or x.get("response") or ""
        q = str(q).strip()
        a = str(a).strip()
        if q and a:
            out.append({"question": q, "answer": a})
    if not out:
        raise ValueError("question/answer табылмады немесе бос.")
    return out


# ---------------------------
# Text normalization
# ---------------------------
_punct_space_left  = re.compile(r"\s+([.,!?;:%)\]\}])")
_punct_space_right = re.compile(r"([(\[\{])\s+")
_multi_space       = re.compile(r"\s+")

def _norm_space_punct(t: str) -> str:
    t = t.replace(" - ", "-")
    t = _punct_space_left.sub(r"\1", t)
    t = _punct_space_right.sub(r"\1", t)
    t = _multi_space.sub(" ", t).strip()
    return t

def morph_marker_view(text: str) -> str:
    """Keeps @@ exactly; only normalizes spaces/punct."""
    t = "" if text is None else str(text)
    return _norm_space_punct(t)

def clean_view(text: str) -> str:
    """Removes @@ markers (joins morphemes) + normalizes."""
    t = "" if text is None else str(text)
    t = t.replace("@@ ", "").replace("@@", "")
    return _norm_space_punct(t)

def norm_for_exact(text: str) -> str:
    return re.sub(r"\s+", " ", morph_marker_view(text).lower()).strip()

def tokens(text: str) -> List[str]:
    t = morph_marker_view(text).lower()
    return re.findall(r"[a-zA-Zа-яА-ЯәғқңөұүһіӘҒҚҢӨÚҮҺІ0-9]+", t)

def token_f1(pred: str, gold: str) -> float:
    p = tokens(pred); g = tokens(gold)
    if not p and not g: return 1.0
    if not p or not g: return 0.0
    from collections import Counter
    pc = Counter(p); gc = Counter(g)
    inter = sum((pc & gc).values())
    if inter == 0: return 0.0
    prec = inter / max(1, len(p))
    rec  = inter / max(1, len(g))
    return (2 * prec * rec) / (prec + rec + 1e-12)


# ---------------------------
# ✅ ONLY ADDITION: output cleaning for interactive answer printing
# ---------------------------
def clean_out_answer(text: str) -> str:
    # Only for printing: remove @@ and normalize spaces/punct
    t = "" if text is None else str(text)
    t = t.replace("@@ ", "").replace("@@", "")
    return _norm_space_punct(t)


# ---------------------------
# Embedding helpers
# ---------------------------
def _l2norm(x: np.ndarray) -> np.ndarray:
    n = np.linalg.norm(x, axis=1, keepdims=True) + 1e-12
    return x / n

def fuse_embeddings(e_clean: np.ndarray, e_morph: np.ndarray, alpha: float) -> np.ndarray:
    """
    e_clean, e_morph already normalized by SentenceTransformer if normalize_embeddings=True,
    but after linear combination we must renormalize.
    """
    fused = alpha * e_clean + (1.0 - alpha) * e_morph
    # renormalize to unit vectors
    if fused.ndim == 1:
        fused = fused / (np.linalg.norm(fused) + 1e-12)
        return fused
    return _l2norm(fused)


# ---------------------------
# Retrieval index
# ---------------------------
@dataclass
class QAIndex:
    mode: str
    q_text: List[str]
    q_emb: np.ndarray
    ans_text: List[str]
    a_emb: np.ndarray  # MORPH answer embeddings

def build_index_morph_only(model: SentenceTransformer, train_rows: List[Dict[str,str]]) -> QAIndex:
    q_view = [morph_marker_view(x["question"]) for x in train_rows]
    a_view = [morph_marker_view(x["answer"])   for x in train_rows]
    q_emb = model.encode(q_view, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False)
    a_emb = model.encode(a_view, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False)
    return QAIndex("MORPH_ONLY", q_view, q_emb, a_view, a_emb)

def build_index_dual_fused(model: SentenceTransformer, train_rows: List[Dict[str,str]], alpha: float) -> QAIndex:
    q_clean = [clean_view(x["question"]) for x in train_rows]
    q_morph = [morph_marker_view(x["question"]) for x in train_rows]
    a_view  = [morph_marker_view(x["answer"]) for x in train_rows]

    e_clean = model.encode(q_clean, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False)
    e_morph = model.encode(q_morph, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False)
    q_fused = fuse_embeddings(e_clean, e_morph, alpha)

    a_emb = model.encode(a_view, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False)

    q_text = [f"CLEAN||MORPH: {qc} || {qm}" for qc, qm in zip(q_clean, q_morph)]
    return QAIndex(f"DUAL_FUSED(alpha={alpha})", q_text, q_fused, a_view, a_emb)

def retrieve_top1(index: QAIndex, q_vec: np.ndarray) -> Tuple[int, float]:
    sims = np.dot(index.q_emb, q_vec)
    i = int(np.argmax(sims))
    return i, float(sims[i])


# ---------------------------
# BERTScore helper
# ---------------------------
def _bert_lang_try(preds: List[str], golds: List[str]) -> Optional[float]:
    if bert_score is None:
        return None
    for lang in ("kk", "tr", "en"):
        try:
            P, R, F1 = bert_score(preds, golds, lang=lang, rescale_with_baseline=True)
            arr = F1.numpy() if hasattr(F1, "numpy") else np.array(F1)
            return float(np.mean(arr))
        except Exception:
            continue
    return None


# ---------------------------
# Evaluation (generic for any index mode)
# ---------------------------
def eval_with_index(model: SentenceTransformer, index: QAIndex, test_rows: List[Dict[str,str]], alpha: float) -> Tuple[Dict[str,Any], List[Dict[str,Any]]]:
    # Build test query embeddings matching the index mode
    if index.mode.startswith("MORPH_ONLY"):
        test_q = [morph_marker_view(x["question"]) for x in test_rows]
        test_q_emb = model.encode(test_q, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False)
    else:
        tq_clean = [clean_view(x["question"]) for x in test_rows]
        tq_morph = [morph_marker_view(x["question"]) for x in test_rows]
        e_clean = model.encode(tq_clean, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False)
        e_morph = model.encode(tq_morph, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False)
        test_q_emb = fuse_embeddings(e_clean, e_morph, alpha)
        test_q = tq_morph  # for logging, keep morph text

    gold = [morph_marker_view(x["answer"]) for x in test_rows]
    gold_a_emb = model.encode(gold, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False)

    exacts, tf1s, qcos1s, semhit = [], [], [], []
    preds_for_bert, golds_for_bert = [], []
    details = []

    for i in range(len(test_rows)):
        idx, qcos = retrieve_top1(index, test_q_emb[i])
        pred = index.ans_text[idx]
        g    = gold[i]

        ex = 1.0 if norm_for_exact(pred) == norm_for_exact(g) else 0.0
        f1 = token_f1(pred, g)
        qsim = float(qcos)

        sem_cos = float(np.dot(index.a_emb[idx], gold_a_emb[i]))
        sh = 1.0 if sem_cos >= SEM_THR else 0.0

        exacts.append(ex); tf1s.append(f1); qcos1s.append(qsim); semhit.append(sh)

        if bert_score is not None:
            preds_for_bert.append(pred)
            golds_for_bert.append(g)

        details.append({
            "mode": index.mode,
            "test_question": test_q[i],
            "gold_answer": g,
            "pred_answer": pred,
            "QSim": qsim,
            "Exact": ex,
            "TokenF1": f1,
            "AnsCos": sem_cos,
            "SemHit": sh
        })

    out = {
        "Mode": index.mode,
        "Exact@1": float(np.mean(exacts)),
        "TokenF1@1": float(np.mean(tf1s)),
        "MeanCos@1(QSim)": float(np.mean(qcos1s)),
        f"Semantic@1(ans_cos≥{SEM_THR})": float(np.mean(semhit)),
    }

    if bert_score is not None and preds_for_bert:
        bf1 = _bert_lang_try(preds_for_bert, golds_for_bert)
        if bf1 is not None:
            out["BERTScoreF1@1"] = float(bf1)

    return out, details


# ---------------------------
# Pretty print + export
# ---------------------------
def print_result_table(rows: List[Dict[str,Any]]):
    print("\n==================== RESULTS (MORPH_ONLY vs DUAL_FUSED) ====================")
    keys = []
    for r in rows:
        for k in r.keys():
            if k not in keys:
                keys.append(k)

    # simple aligned print
    for r in rows:
        print("\n---", r.get("Mode", "MODE"), "---")
        for k in keys:
            if k not in r:
                continue
            v = r[k]
            if isinstance(v, float):
                print(f"{k:>28}: {v:.6f}")
            else:
                print(f"{k:>28}: {v}")

def export_csv(details: List[Dict[str,Any]], path: str):
    if not details:
        return
    fields = list(details[0].keys())
    with open(path, "w", encoding="utf-8", newline="") as f:
        w = csv.DictWriter(f, fieldnames=fields)
        w.writeheader()
        for r in details:
            w.writerow(r)
    print(f"\n✅ CSV exported: {path}  (rows={len(details)})")


# ---------------------------
# Interactive QA (uses DUAL by default)
# ---------------------------
def interactive(model: SentenceTransformer, idx_morph: QAIndex, idx_dual: QAIndex, alpha: float):
    mode = "DUAL"  # default
    print("\n==================== INTERACTIVE QA (SEG_AS_BASE) ====================")
    print("Commands: /morph  -> MORPH_ONLY mode")
    print("          /dual   -> DUAL_FUSED mode")
    print("          exit    -> finish and run metrics\n")

    while True:
        q = input("Сұрақ: ").strip()
        if not q:
            continue
        if q.lower() in {"exit","quit","q"}:
            break
        if q.lower() == "/morph":
            mode = "MORPH"
            print("✅ Switched to MORPH_ONLY\n")
            continue
        if q.lower() == "/dual":
            mode = "DUAL"
            print(f"✅ Switched to DUAL_FUSED(alpha={alpha})\n")
            continue

        if mode == "MORPH":
            qv = model.encode([morph_marker_view(q)], convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False)[0]
            j, sim = retrieve_top1(idx_morph, qv)

            # ✅ ONLY CHANGE: clean final printed answer (remove @@) without changing retrieval
            out_ans = clean_out_answer(idx_morph.ans_text[j])

            print(f"\n[{idx_morph.mode}] Top1 QSim={sim:.4f}\n{out_ans}\n")
        else:
            qc = clean_view(q)
            qm = morph_marker_view(q)
            ec = model.encode([qc], convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False)[0]
            em = model.encode([qm], convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False)[0]
            qf = fuse_embeddings(ec, em, alpha)
            j, sim = retrieve_top1(idx_dual, qf)

            # ✅ ONLY CHANGE: clean final printed answer (remove @@) without changing retrieval
            out_ans = clean_out_answer(idx_dual.ans_text[j])

            print(f"\n[{idx_dual.mode}] Top1 QSim={sim:.4f}\n{out_ans}\n")


# ---------------------------
# MAIN
# ---------------------------
def main():
    random.seed(SEED)
    np.random.seed(SEED)

    data_path = find_data_path(DATA_PATH)
    rows = load_qa_records(data_path)
    print(f"[SEG_AS_BASE] Loaded: {len(rows)} | {data_path}")

    train_rows, test_rows = train_test_split(
        rows, test_size=TEST_SIZE, random_state=SEED, shuffle=True
    )
    print("\n==================== ONE SPLIT ====================")
    print(f"Total={len(rows)} | Train={len(train_rows)} | Test={len(test_rows)} | seed={SEED} | test_size={TEST_SIZE}")
    print("Mode A = MORPH_ONLY (keeps @@)")
    print(f"Mode B = DUAL_FUSED(clean+morph), alpha={ALPHA}")

    model = SentenceTransformer(MODEL_NAME, device=DEVICE)
    print(f"\nModel: {MODEL_NAME}")

    # Build both indices once
    print("\n[1/3] Building MORPH_ONLY index...")
    idx_morph = build_index_morph_only(model, train_rows)

    print("[2/3] Building DUAL_FUSED index...")
    idx_dual = build_index_dual_fused(model, train_rows, ALPHA)

    # Interactive
    interactive(model, idx_morph, idx_dual, ALPHA)

    # Auto eval after exit
    print("\n[3/3] Running evaluation...")
    t0 = time.time()
    res_m, det_m = eval_with_index(model, idx_morph, test_rows, ALPHA)
    res_d, det_d = eval_with_index(model, idx_dual,  test_rows, ALPHA)
    dt = time.time() - t0

    print_result_table([res_m, res_d])
    print(f"\nTime: {dt:.2f}s")
    if bert_score is None:
        print("Note: BERTScore орнатылмаған (pip install bert-score).")

    if EXPORT_CSV:
        export_csv(det_m, CSV_PATH_MORPH)
        export_csv(det_d, CSV_PATH_DUAL)

    print("\n✅ DONE: MORPH_ONLY vs DUAL_FUSED finished (reviewer-proof, same split, same seed)")

if __name__ == "__main__":
    main()


[SEG_AS_BASE] Loaded: 14998 | kazakh_segmented_15000.json

Total=14998 | Train=13498 | Test=1500 | seed=42 | test_size=0.1
Mode A = MORPH_ONLY (keeps @@)
Mode B = DUAL_FUSED(clean+morph), alpha=0.5


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/645 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/471M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


tokenizer_config.json:   0%|          | 0.00/526 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


Model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2

[1/3] Building MORPH_ONLY index...
[2/3] Building DUAL_FUSED index...

Commands: /morph  -> MORPH_ONLY mode
          /dual   -> DUAL_FUSED mode
          exit    -> finish and run metrics

Сұрақ: Python тілінің негізгі артықшылықтарын атаңыз.

[DUAL_FUSED(alpha=0.5)] Top1 QSim=0.8520
Python — оқуға жеңіл және интуитивті синтаксиске ие тіл. \n` \n` ` ` ` python \n`сан = 5 \n`if сан > 3: \n` print (\ ` Үлкен сан \ `) \n` ` ` `

Сұрақ: Python-дағы REPL ортасының практикалық пайдасы неде?

[DUAL_FUSED(alpha=0.5)] Top1 QSim=0.8003
REPL — Read-Eval-Print Loop. Python интерпретаторы интерактивті түрде осы жүйемен жұмыс істейді. \n` \n` ` ` ` bash \n` $ python \n` > > > 2 + 2 \n`4 \n` ` ` `

Сұрақ: CPython, PyPy сияқты интерпретаторлардың айырмашылығы қандай?

[DUAL_FUSED(alpha=0.5)] Top1 QSim=0.7364
Иә, Settings → Project → Python Interpreter арқылы бірнеше орта қосуға болады. \n` \n` ` ` ` python \n` # Әр жоба үшін жеке ор

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

BertModel LOAD REPORT from: bert-base-multilingual-cased
Key                                        | Status     |  | 
-------------------------------------------+------------+--+-
cls.seq_relationship.bias                  | UNEXPECTED |  | 
cls.predictions.transform.LayerNorm.bias   | UNEXPECTED |  | 
cls.predictions.bias                       | UNEXPECTED |  | 
cls.predictions.transform.LayerNorm.weight | UNEXPECTED |  | 
cls.predictions.transform.dense.weight     | UNEXPECTED |  | 
cls.seq_relationship.weight                | UNEXPECTED |  | 
cls.predictions.transform.dense.bias       | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

BertModel LOAD REPORT from: bert-base-multilingual-cased
Key                                        | Status     |  | 
-------------------------------------------+------------+--+-
cls.seq_relationship.bias                  | UNEXPECTED |  | 
cls.predictions.transform.LayerNorm.bias   | UNEXPECTED |  | 
cls.predictions.bias                       | UNEXPECTED |  | 
cls.predictions.transform.LayerNorm.weight | UNEXPECTED |  | 
cls.predictions.transform.dense.weight     | UNEXPECTED |  | 
cls.seq_relationship.weight                | UNEXPECTED |  | 
cls.predictions.transform.dense.bias       | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.




--- MORPH_ONLY ---
                        Mode: MORPH_ONLY
                     Exact@1: 0.026667
                   TokenF1@1: 0.489012
             MeanCos@1(QSim): 0.943246
    Semantic@1(ans_cos≥0.85): 0.320667
               BERTScoreF1@1: 0.831895

--- DUAL_FUSED(alpha=0.5) ---
                        Mode: DUAL_FUSED(alpha=0.5)
                     Exact@1: 0.026000
                   TokenF1@1: 0.505285
             MeanCos@1(QSim): 0.921644
    Semantic@1(ans_cos≥0.85): 0.377333
               BERTScoreF1@1: 0.836318

Time: 59.77s

✅ CSV exported: seg_morph_only_details.csv  (rows=1500)

✅ CSV exported: seg_dual_fused_details.csv  (rows=1500)

✅ DONE: MORPH_ONLY vs DUAL_FUSED finished (reviewer-proof, same split, same seed)


