# ABI Trust + Question Quality Scoring Demo

This notebook demonstrates how to use `score.py` to evaluate a full conversation.

**Requirements:**

- `middleware.py` with the ABI Trust Pipe implementation (TrustState / APIScorer / ABIEngine / Gate / GateInputs / GateResult / BasePipe).
- `score.py` in the same directory as this notebook (the file that defines `score_conversation`).

The notebook will:
1. Construct a demo conversation.
2. Run `score_conversation` with `use_trust=True` (trust + ABI + question quality + gate).
3. Run `score_conversation` with `use_trust=False` (only question quality + gate).


In [1]:
from google.colab import drive
from pathlib import Path
drive.mount('/content/drive')

PROJ = Path('/content/drive/MyDrive/rag_bio_project').resolve()
print('Project path:', PROJ)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Project path: /content/drive/MyDrive/rag_bio_project


In [2]:
# 挂载 + 切目录 + 修正 sys.path（Colab 一次性修好路径）
from google.colab import drive
drive.mount('/content/drive')

import os, sys, pathlib

PROJ = "/content/drive/MyDrive/rag_bio_project"
assert os.path.exists(PROJ), f"项目目录不存在: {PROJ}"

# 让 Python 能在该目录下找模块
if PROJ not in sys.path:
    sys.path.insert(0, PROJ)

# 切换工作目录（可选，但推荐）
os.chdir(PROJ)

# 检查模块是否真的在那儿
print("CWD:", os.getcwd())
print("Has module:", os.path.exists("abi_trust_pipeline.py"))
!ls -l abi_trust_pipeline.py


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
CWD: /content/drive/MyDrive/rag_bio_project
Has module: True
-rw------- 1 root root 33116 Nov 28 11:12 abi_trust_pipeline.py


In [3]:

import json
from typing import List, Dict

from score import score_conversation

def make_demo_conversation() -> List[Dict[str, str]]:
    """Construct a simple demo conversation in OpenAI messages format."""
    return [
        {
            "role": "system",
            "content": "You are a careful financial advisor that answers in Chinese.",
        },
        {
            "role": "user",
            "content": "请问如果我每个月收入一万块，大概应该存多少钱比较合适？",
        },
        {
            "role": "assistant",
            "content": "一般可以从20%-30%开始，根据你的风险偏好和现有负债做微调。",
        },
        {
            "role": "user",
            "content": "如果突然失业三个月，我现在应该准备多少紧急备用金？",
        },
        {
            "role": "assistant",
            "content": "通常建议准备3-6个月的基本生活支出作为紧急预备金。",
        },
        {
            "role": "user",
            "content": "那我应该先还信用卡还是先存钱做投资？",
        },
    ]

print("Demo conversation constructed with", len(make_demo_conversation()), "messages.")


Demo conversation constructed with 6 messages.


In [4]:

conv = make_demo_conversation()

# Mode 1: include trust & ABI in the score
res_trust = score_conversation(
    conv,
    use_mock_scores=True,   # use mock ABI scores for offline testing
    use_trust=True,
)

# Mode 2: use only question quality + gate
res_no_trust = score_conversation(
    conv,
    use_mock_scores=True,
    use_trust=False,
)

print("=" * 80)
print("use_trust = True (trust + ABI + question quality + gate)")
print("=" * 80)
print(json.dumps(res_trust, ensure_ascii=False, indent=2))

print("\n" + "=" * 80)
print("use_trust = False (question quality + gate only)")
print("=" * 80)
print(json.dumps(res_no_trust, ensure_ascii=False, indent=2))


use_trust = True (trust + ABI + question quality + gate)
{
  "conversation_overall_0_100": 75.3,
  "use_trust": true,
  "final_stage": "knowledge",
  "final_trust_weighted_0_1": 0.691,
  "final_A": 0.664,
  "final_B": 0.594,
  "final_I": 1.0,
  "stage_trajectory": [
    "contract",
    "contract",
    "knowledge",
    "knowledge"
  ],
  "n_user_turns": 3,
  "turn_scores": [
    {
      "overall_0_100": 83.2,
      "use_trust": true,
      "weights_effective": {
        "trust": 0.4,
        "abi": 0.2,
        "question": 0.3,
        "gate": 0.1
      },
      "components": {
        "trust_long_0_1": 0.812,
        "abi_local_0_1": 0.767,
        "abi_delta_mag": 0.1272,
        "question_quality_0_1": 0.929,
        "question_habit_0_1": 0.929,
        "gate_p_answer_0_1": 0.748
      },
      "abi": {
        "A_local": 0.717,
        "B_local": 0.699,
        "I_local": 0.885,
        "A_after": 0.554,
        "B_after": 0.52,
        "I_after": 1.0,
        "dA": 0.0541,
        