# SGR Quality: Executive Notebook (C-Level)

Цель: показать **сильные/негативные кейсы** на уровне seller turn в стабильном продуктовой контракте.

- Неподвижный режим исполнения: bundled evaluator + bundled judge, full context, full LLM trace.
- Негативные кейсы: строго `judge_label = 0` в **latest successful run**.
- Бизнес-фокус: улучшение качества переписок как драйвер роста `retention`.


In [1]:
from __future__ import annotations

import sqlite3
from pathlib import Path

import pandas as pd

pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_columns", 200)
pd.set_option("display.width", 240)


def resolve_db_path() -> Path:
    cwd = Path.cwd().resolve()
    candidates = [cwd / "dialogs.db", cwd.parent / "dialogs.db"]
    candidates.extend(parent / "dialogs.db" for parent in cwd.parents)
    for path in candidates:
        if path.exists():
            return path
    raise FileNotFoundError("dialogs.db not found. Run: make init-fresh && make scan")


DB_PATH = resolve_db_path()
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row


def qdf(sql: str, params: tuple[object, ...] = ()) -> pd.DataFrame:
    return pd.read_sql_query(sql, conn, params=params)


def latest_run_id() -> str:
    row = conn.execute(
        """
        SELECT run_id
        FROM scan_runs
        WHERE status='success'
        ORDER BY started_at_utc DESC
        LIMIT 1
        """
    ).fetchone()
    if row is None:
        raise ValueError("No successful run found. Run: make scan")
    return str(row["run_id"])


def as_yes_no(value: object) -> str:
    if pd.isna(value):
        return "N/A"
    return "Да" if bool(int(value)) else "Нет"


def as_pct(value: object) -> str:
    if pd.isna(value):
        return "N/A"
    return f"{float(value):.1%}"


def style_business_table(df: pd.DataFrame, caption: str | None = None):
    if df.empty:
        print("Таблица пустая для выбранного условия.")
        return

    styler = (
        df.style.hide(axis="index")
        .set_properties(
            **{
                "white-space": "pre-wrap",
                "overflow-wrap": "anywhere",
                "word-break": "break-word",
                "vertical-align": "top",
                "text-align": "left",
            }
        )
        .set_table_styles(
            [
                {
                    "selector": "th",
                    "props": [
                        ("text-align", "left"),
                        ("white-space", "normal"),
                        ("background-color", "#f4f6f8"),
                    ],
                },
                {
                    "selector": "caption",
                    "props": [
                        ("caption-side", "top"),
                        ("text-align", "left"),
                        ("font-weight", "bold"),
                    ],
                },
            ]
        )
    )
    if caption:
        styler = styler.set_caption(caption)
    display(styler)


NEGATIVE_RECOMMENDATIONS = {
    "greeting": "Добавьте короткое персональное приветствие перед полезной частью ответа.",
    "upsell": "Добавьте релевантный следующий платный шаг, а не только статус/скидку.",
    "empathy": "Сначала признавайте ситуацию клиента, затем переходите к решению.",
}

RUN_ID = latest_run_id()
print(f"Используется последний успешный run_id: {RUN_ID}")
print(f"База: {DB_PATH}")
print("display.max_colwidth:", pd.get_option("display.max_colwidth"))
assert pd.get_option("display.max_colwidth") is None


Используется последний успешный run_id: scan_a638d3fc8b97
База: /Users/ablackman/go/src/github.com/tetraminz/sales_protocol/dialogs.db
display.max_colwidth: None


## 1) KPI latest run

Ниже сводка по объему проверки, покрытию judge и числу сильных/негативных turn-кейсов.


## Business Effect: Retention

Как читать этот отчет для бизнеса:
- зеленые зоны по правилам = более стабильное качество общения в критичных точках диалога;
- снижение judge-confirmed негативов = меньше коммуникационных срывов и выше шанс повторных покупок;
- цель руководства: управлять трендом качества как опережающим индикатором `retention`.


In [2]:
run_snapshot = qdf(
    """
    WITH latest_run AS (
        SELECT run_id
        FROM scan_runs
        WHERE status='success'
        ORDER BY started_at_utc DESC
        LIMIT 1
    )
    SELECT
      run_id,
      model,
      selected_conversations,
      messages_count,
      started_at_utc,
      finished_at_utc,
      summary_json
    FROM scan_runs
    WHERE run_id=(SELECT run_id FROM latest_run)
    """
)

kpi = qdf(
    """
    WITH latest_run AS (
        SELECT run_id
        FROM scan_runs
        WHERE status='success'
        ORDER BY started_at_utc DESC
        LIMIT 1
    ),
    base AS (
        SELECT
            seller_message_id,
            judge_label
        FROM scan_results sr
        WHERE sr.run_id=(SELECT run_id FROM latest_run)
    ),
    scored AS (
        SELECT
            b.*,
            AVG(CASE WHEN b.judge_label = 1 THEN 1.0 ELSE 0.0 END)
                OVER (PARTITION BY b.seller_message_id) AS final_score
        FROM base b
    )
    SELECT
      COUNT(*) AS total_rule_checks,
      COUNT(DISTINCT seller_message_id) AS seller_turns,
      SUM(CASE WHEN judge_label IS NOT NULL THEN 1 ELSE 0 END) AS judged_checks,
      SUM(CASE WHEN judge_label = 1 THEN 1 ELSE 0 END) AS judge_true_checks,
      SUM(CASE WHEN judge_label = 0 THEN 1 ELSE 0 END) AS negative_checks,
      COUNT(DISTINCT CASE WHEN final_score >= 0.90 THEN seller_message_id END) AS strong_turns,
      CASE
        WHEN COUNT(*) = 0 THEN NULL
        ELSE 1.0 * SUM(CASE WHEN judge_label IS NOT NULL THEN 1 ELSE 0 END) / COUNT(*)
      END AS judge_coverage
    FROM scored
    """
)

cards = pd.DataFrame([
    {"Показатель": "Run ID", "Значение": str(run_snapshot.loc[0, "run_id"])},
    {"Показатель": "Модель", "Значение": str(run_snapshot.loc[0, "model"])},
    {"Показатель": "Проверок по правилам", "Значение": int(kpi.loc[0, "total_rule_checks"])},
    {"Показатель": "Seller turn", "Значение": int(kpi.loc[0, "seller_turns"])},
    {"Показатель": "Judge coverage", "Значение": as_pct(kpi.loc[0, "judge_coverage"])},
    {"Показатель": "Сильных turn (>=0.90)", "Значение": int(kpi.loc[0, "strong_turns"])},
    {"Показатель": "Негативных проверок", "Значение": int(kpi.loc[0, "negative_checks"])},
])
style_business_table(cards, caption="KPI latest run")


Показатель,Значение
Run ID,scan_a638d3fc8b97
Модель,gpt-4.1-mini
Проверок по правилам,147
Seller turn,49
Judge coverage,100.0%
Сильных turn (>=0.90),44
Негативных проверок,5


## 2) Сильные кейсы (почему хорошо)

Контракт отбора:
- `judge_label = 1`
- `final_score_for_turn >= 0.90`
- показываем пару `Customer -> Sales Rep`.


In [3]:
good_cases = qdf(
    """
    WITH latest_run AS (
        SELECT run_id
        FROM scan_runs
        WHERE status='success'
        ORDER BY started_at_utc DESC
        LIMIT 1
    ),
    base AS (
        SELECT
            sr.seller_message_id,
            sr.customer_message_id,
            sr.rule_key,
            sr.eval_hit,
            sr.judge_expected_hit,
            sr.judge_label,
            sr.eval_confidence,
            sr.judge_confidence,
            sr.eval_reason_code,
            sr.eval_reason,
            sr.judge_rationale,
            sr.evidence_quote,
            seller.text AS seller_text,
            COALESCE(customer.text, '') AS customer_text
        FROM scan_results sr
        JOIN messages seller ON seller.message_id = sr.seller_message_id
        LEFT JOIN messages customer ON customer.message_id = sr.customer_message_id
        WHERE sr.run_id = (SELECT run_id FROM latest_run)
    ),
    scored AS (
        SELECT
            b.*,
            AVG(CASE WHEN b.judge_label = 1 THEN 1.0 ELSE 0.0 END)
                OVER (PARTITION BY b.seller_message_id) AS final_score,
            SUM(CASE WHEN b.judge_label = 1 THEN 1 ELSE 0 END)
                OVER (PARTITION BY b.seller_message_id) AS good_rules,
            COUNT(*) OVER (PARTITION BY b.seller_message_id) AS total_rules
        FROM base b
    )
    SELECT
        customer_text,
        seller_text,
        rule_key,
        eval_hit,
        judge_expected_hit,
        ROUND(eval_confidence, 3) AS eval_conf,
        ROUND(judge_confidence, 3) AS judge_conf,
        eval_reason_code,
        eval_reason,
        judge_rationale,
        evidence_quote,
        ROUND(final_score, 3) AS final_score_for_turn,
        (good_rules || '/' || total_rules) AS good_rules_for_turn
    FROM scored
    WHERE final_score >= 0.90
      AND judge_label = 1
    ORDER BY final_score DESC, judge_conf DESC, seller_text, rule_key
    """
)

print(f"Найдено strong rule-checks: {len(good_cases)}")

good_view = good_cases.rename(columns={
    "customer_text": "Реплика покупателя",
    "seller_text": "Реплика продавца",
    "rule_key": "Правило (Rule)",
    "eval_hit": "Решение evaluator",
    "judge_expected_hit": "Ожидание judge",
    "eval_conf": "Уверенность evaluator",
    "judge_conf": "Уверенность judge",
    "eval_reason_code": "Код причины evaluator",
    "eval_reason": "Почему evaluator так решил",
    "judge_rationale": "Почему judge согласен/не согласен",
    "evidence_quote": "Дословная цитата",
    "final_score_for_turn": "Итоговая оценка turn",
    "good_rules_for_turn": "Сколько правил выполнено",
})

if not good_view.empty:
    good_view["Решение evaluator"] = good_view["Решение evaluator"].map(as_yes_no)
    good_view["Ожидание judge"] = good_view["Ожидание judge"].map(as_yes_no)

style_business_table(good_view, caption="Strong cases: полная таблица latest run")


Найдено strong rule-checks: 132


Реплика покупателя,Реплика продавца,Правило (Rule),Решение evaluator,Ожидание judge,Уверенность evaluator,Уверенность judge,Код причины evaluator,Почему evaluator так решил,Почему judge согласен/не согласен,Дословная цитата,Итоговая оценка turn,Сколько правил выполнено
,"** Good morning, this is Sarah from ModaMart. How can I assist you today?",empathy,Нет,Нет,1.0,1.0,courtesy_without_empathy,"Вежливое обращение присутствует, но нет признания ситуации клиента (эмпатии).",В реплике отсутствует признание ситуации клиента.,,1.0,3/3
,"** Good morning, this is Sarah from ModaMart. How can I assist you today?",greeting,Да,Да,1.0,1.0,greeting_present,В реплике продавца присутствует явное приветствие клиенту.,В реплике продавца есть явное приветствие.,"** Good morning, this is Sarah from ModaMart.",1.0,3/3
,"** Good morning, this is Sarah from ModaMart. How can I assist you today?",upsell,Нет,Нет,1.0,1.0,upsell_missing,В реплике отсутствует предложение следующего платного шага (допродажа).,В реплике нет предложения следующего платного шага.,,1.0,3/3
"** I think I’ll go with the NordicEdge Synthetic Insulated Jacket in large, like you mentioned.",** Great choice! I’ll get that set up for you with express shipping. You should receive it within 2-3 business days. Can I assist you with anything else today?,empathy,Нет,Нет,1.0,1.0,informational_without_empathy,Реплика содержит информативный ответ без признания или отражения ситуации клиента.,"В реплике нет проявления эмпатии, поэтому expected_hit = false.",You should receive it within 2-3 business days.,1.0,3/3
"** I think I’ll go with the NordicEdge Synthetic Insulated Jacket in large, like you mentioned.",** Great choice! I’ll get that set up for you with express shipping. You should receive it within 2-3 business days. Can I assist you with anything else today?,greeting,Нет,Нет,1.0,1.0,greeting_missing,В реплике отсутствует явное приветствие клиента.,"Реплика не содержит приветствия, ожидаемый hit = false.",,1.0,3/3
"** I think I’ll go with the NordicEdge Synthetic Insulated Jacket in large, like you mentioned.",** Great choice! I’ll get that set up for you with express shipping. You should receive it within 2-3 business days. Can I assist you with anything else today?,upsell,Да,Да,1.0,1.0,upsell_offer,Есть предложение дополнительной платной услуги — экспресс-доставка.,"Предложена экспресс-доставка, что является платным шагом.",I’ll get that set up for you with express shipping.,1.0,3/3
"** Well, I’m mainly looking for something warm and stylish for the winter. I noticed some of the jackets say they're insulated, but I’m not sure how warm they really are.",** I understand. Our insulated jackets are designed to keep you warm even in very cold temperatures. We use high-quality down and synthetic materials. Do you have any preferences between down or synthetic insulation?,greeting,Нет,Нет,1.0,1.0,greeting_missing,В этой реплике отсутствует явное приветствие клиента.,"В реплике продавца отсутствует приветствие, как и ожидалось.",,1.0,3/3
** I'd like to have it within the next week if possible.,"** In that case, I’d recommend our express shipping option. Let’s finalize your choice of jacket first. Are you leaning towards a specific model?",empathy,Нет,Нет,1.0,1.0,informational_without_empathy,"Реплика информативна, но не содержит признания ситуации клиента.",Реплика не содержит выражения признания ситуации клиента.,,1.0,3/3
** I'd like to have it within the next week if possible.,"** In that case, I’d recommend our express shipping option. Let’s finalize your choice of jacket first. Are you leaning towards a specific model?",greeting,Нет,Нет,1.0,1.0,greeting_missing,В реплике отсутствует приветствие клиенту.,В реплике продавца нет приветствия.,,1.0,3/3
** I'd like to have it within the next week if possible.,"** In that case, I’d recommend our express shipping option. Let’s finalize your choice of jacket first. Are you leaning towards a specific model?",upsell,Да,Да,1.0,1.0,upsell_offer,Предложена услуга экспресс-доставки как следующий платный шаг.,Реплика содержит предложение экспресс-доставки — платный шаг.,"In that case, I’d recommend our express shipping option.",1.0,3/3


## 3) Негативные кейсы (что исправить)

Источник: `latest_run` и `judge_label = 0`.


In [4]:
negative_cases = qdf(
    """
    WITH latest_run AS (
        SELECT run_id
        FROM scan_runs
        WHERE status='success'
        ORDER BY started_at_utc DESC
        LIMIT 1
    )
    SELECT
      sr.seller_message_id,
      sr.customer_message_id,
      seller.text AS seller_text,
      COALESCE(customer.text, '') AS customer_text,
      sr.rule_key,
      sr.eval_hit,
      sr.judge_expected_hit,
      ROUND(sr.eval_confidence, 3) AS eval_conf,
      ROUND(sr.judge_confidence, 3) AS judge_conf,
      sr.eval_reason_code,
      sr.eval_reason,
      sr.judge_rationale,
      sr.evidence_quote
    FROM scan_results sr
    JOIN messages seller ON seller.message_id = sr.seller_message_id
    LEFT JOIN messages customer ON customer.message_id = sr.customer_message_id
    WHERE sr.run_id=(SELECT run_id FROM latest_run)
      AND sr.judge_label=0
    ORDER BY COALESCE(sr.judge_confidence, 0) DESC, sr.seller_message_id, sr.rule_key
    """
)

print(f"Негативных проверок в latest run: {len(negative_cases)}")

if negative_cases.empty:
    print("Негативных кейсов не найдено в последнем успешном запуске.")
else:
    negative_view = negative_cases.rename(columns={
        "customer_text": "Реплика покупателя",
        "seller_text": "Реплика продавца",
        "rule_key": "Правило (Rule)",
        "eval_hit": "Решение evaluator",
        "judge_expected_hit": "Ожидание judge",
        "eval_conf": "Уверенность evaluator",
        "judge_conf": "Уверенность judge",
        "eval_reason_code": "Код причины evaluator",
        "eval_reason": "Почему evaluator так решил",
        "judge_rationale": "Почему judge согласен/не согласен",
        "evidence_quote": "Дословная цитата",
    }).copy()

    negative_view["Решение evaluator"] = negative_view["Решение evaluator"].map(as_yes_no)
    negative_view["Ожидание judge"] = negative_view["Ожидание judge"].map(as_yes_no)
    negative_view["Рекомендация для бизнеса"] = negative_view["Правило (Rule)"].map(NEGATIVE_RECOMMENDATIONS).fillna(
        "Разобрать кейс вручную и обновить playbook команды продаж."
    )

    negative_view = negative_view[
        [
            "Реплика покупателя",
            "Реплика продавца",
            "Правило (Rule)",
            "Решение evaluator",
            "Ожидание judge",
            "Уверенность evaluator",
            "Уверенность judge",
            "Код причины evaluator",
            "Почему evaluator так решил",
            "Почему judge согласен/не согласен",
            "Дословная цитата",
            "Рекомендация для бизнеса",
        ]
    ]

    style_business_table(negative_view, caption="Negative cases: latest run")


Негативных проверок в latest run: 5


Реплика покупателя,Реплика продавца,Правило (Rule),Решение evaluator,Ожидание judge,Уверенность evaluator,Уверенность judge,Код причины evaluator,Почему evaluator так решил,Почему judge согласен/не согласен,Дословная цитата,Рекомендация для бизнеса
"No, I think that covers it.","Fantastic. I’ll email you the information, and if you have any more questions or need further assistance, feel free to reach out. We’re here to help. Thank you for your time and have a great day!",empathy,Нет,Да,1.0,1.0,informational_without_empathy,"Продавец выражает вежливость и готовность помочь, но не признаёт и не отражает состояние или ситуацию клиента.","Продавец выражает готовность помочь, что можно считать признанием ситуации клиента, но evaluator поставил false.",We’re here to help. Thank you for your time and have a great day!,"Сначала признавайте ситуацию клиента, затем переходите к решению."
That’s a nice touch. I’m also concerned about the pricing. I want good quality but at a reasonable price.,"We strive to offer quality products at competitive prices. Plus, if you sign up for our ModaMart Rewards program, you’ll receive exclusive discounts and early access to sales. As a new customer, we can offer you a 20% discount on your first purchase.",empathy,Нет,Да,1.0,1.0,informational_without_empathy,"Реплика информативна по цене и качеству, но не содержит признания или понимания беспокойств клиента.","Реплика не признаёт явно беспокойства клиента, хотя должна.",,"Сначала признавайте ситуацию клиента, затем переходите к решению."
"That sounds reassuring, but it's still quite a lot of money. Do you offer any payment plans?","Yes, we do offer financing options. You can break down the cost into manageable monthly payments at 0% interest if paid within 6 months. We also have a 30-day return policy if you’re not completely satisfied with your purchase.",empathy,Нет,Да,1.0,1.0,informational_without_empathy,"Продавец информирует о вариантах оплаты и политике возврата, но не признаёт непосредственно чувства или ситуацию клиента.","Продавец ранее признал проблему клиента, текущая реплика об оплате свидетельствует о признании ситуации.",,"Сначала признавайте ситуацию клиента, затем переходите к решению."
"No, I haven't really looked at the reviews yet.","I highly recommend it. They can be very informative. Plus, if you find that you're still not satisfied with the product, our customer service team is here to ensure you have a positive experience.",empathy,Нет,Да,0.95,0.95,informational_without_empathy,"Продавец не признает ситуацию или переживания клиента, а лишь предоставляет информацию и рекомендацию.","В реплике продавец выражает понимание (рекомендация отзывов и поддержка), значит есть эмпатия, оценка hit=false ошибочна.","I highly recommend it. They can be very informative. Plus, if you find that you're still not satisfied with the product, our customer service team is here to ensure you have a positive experience.","Сначала признавайте ситуацию клиента, затем переходите к решению."
"Well, I have been looking at some of your new fall collection, but I'm honestly not sure about a few things.",That's great to hear you've been checking out our new collection! What specifically has you unsure?,empathy,Нет,Да,0.9,0.9,informational_without_empathy,"Реплика позитивна и информативна, но нет признания чувств или ситуации клиента.","В контексте есть признание ситуации клиента (строка 3), но реплика не выражает эмпатию, поэтому hit=false неверно.",That's great to hear you've been checking out our new collection! What specifically has you unsure?,"Сначала признавайте ситуацию клиента, затем переходите к решению."


## 4) Глоссарий RU + EN


In [5]:
glossary = pd.DataFrame([
    {"Поле": "evaluator_hit_rate", "RU": "Доля eval_hit=1", "EN": "Evaluator hit rate"},
    {"Поле": "judge_correctness", "RU": "Доля judge_label=1 среди judged", "EN": "Judge correctness on judged subset"},
    {"Поле": "judge_coverage", "RU": "Доля judged среди всех eval", "EN": "Judge coverage"},
    {"Поле": "full_llm_trace", "RU": "Полный audit след request/response/extracted сохраняется всегда", "EN": "Full LLM audit trace is always stored"},
    {"Поле": "seller_message_id", "RU": "ID реплики продавца", "EN": "Seller turn message id"},
    {"Поле": "customer_message_id", "RU": "ID последней релевантной реплики покупателя", "EN": "Relevant customer message id"},
    {"Поле": "evidence_quote", "RU": "Дословная цитата из реплики продавца", "EN": "Evidence quote"},
])
style_business_table(glossary, caption="Глоссарий ключевых полей")


Поле,RU,EN
evaluator_hit_rate,Доля eval_hit=1,Evaluator hit rate
judge_correctness,Доля judge_label=1 среди judged,Judge correctness on judged subset
judge_coverage,Доля judged среди всех eval,Judge coverage
full_llm_trace,Полный audit след request/response/extracted сохраняется всегда,Full LLM audit trace is always stored
seller_message_id,ID реплики продавца,Seller turn message id
customer_message_id,ID последней релевантной реплики покупателя,Relevant customer message id
evidence_quote,Дословная цитата из реплики продавца,Evidence quote


## 5) Technical Appendix

Здесь intentionally оставлены ID и трасс-метрики.


In [6]:
appendix = qdf(
    """
    WITH latest_run AS (
        SELECT run_id
        FROM scan_runs
        WHERE status='success'
        ORDER BY started_at_utc DESC
        LIMIT 1
    )
    SELECT
      sr.conversation_id,
      sr.seller_message_id,
      sr.customer_message_id,
      sr.rule_key,
      sr.eval_hit,
      sr.eval_confidence,
      sr.judge_expected_hit,
      sr.judge_label,
      sr.judge_confidence,
      sr.eval_reason_code,
      sr.evidence_quote
    FROM scan_results sr
    WHERE sr.run_id=(SELECT run_id FROM latest_run)
    ORDER BY sr.conversation_id, sr.seller_message_id, sr.rule_key
    """
)

llm_stats = qdf(
    """
    WITH latest_run AS (
        SELECT run_id
        FROM scan_runs
        WHERE status='success'
        ORDER BY started_at_utc DESC
        LIMIT 1
    )
    SELECT
      phase,
      COUNT(*) AS calls,
      SUM(CASE WHEN error_message<>'' THEN 1 ELSE 0 END) AS errors,
      SUM(prompt_chars) AS prompt_chars,
      SUM(response_chars) AS response_chars,
      ROUND(AVG(latency_ms), 1) AS avg_latency_ms,
      MAX(latency_ms) AS max_latency_ms,
      MIN(trace_mode) AS trace_mode
    FROM llm_calls
    WHERE run_id=(SELECT run_id FROM latest_run)
    GROUP BY phase
    ORDER BY phase
    """
)

print("Appendix: scan_results (latest run)")
display(appendix)
print("\nAppendix: llm_calls summary")
display(llm_stats)


Appendix: scan_results (latest run)


Unnamed: 0,conversation_id,seller_message_id,customer_message_id,rule_key,eval_hit,eval_confidence,judge_expected_hit,judge_label,judge_confidence,eval_reason_code,evidence_quote
0,modamart__0_transcript,1,,empathy,0,1.0,0,1,1.0,courtesy_without_empathy,
1,modamart__0_transcript,1,,greeting,1,1.0,1,1,1.0,greeting_present,Hi there!
2,modamart__0_transcript,1,,upsell,0,1.0,0,1,1.0,upsell_missing,
3,modamart__0_transcript,3,2.0,empathy,0,0.9,0,1,0.9,informational_without_empathy,
4,modamart__0_transcript,3,2.0,greeting,0,0.9,0,1,0.9,greeting_missing,
...,...,...,...,...,...,...,...,...,...,...,...
142,modamart__4_transcript,93,92.0,greeting,0,1.0,0,1,1.0,greeting_missing,
143,modamart__4_transcript,93,92.0,upsell,1,1.0,1,1,1.0,upsell_offer,or you can order directly through our website at your convenience
144,modamart__4_transcript,95,94.0,empathy,0,1.0,0,1,1.0,courtesy_without_empathy,My pleasure! Feel free to reach out if you have any more questions. Have a great day!
145,modamart__4_transcript,95,94.0,greeting,0,1.0,0,1,1.0,greeting_missing,



Appendix: llm_calls summary


Unnamed: 0,phase,calls,errors,prompt_chars,response_chars,avg_latency_ms,max_latency_ms,trace_mode
0,evaluator,49,0,180935,30626,3510.5,6843,full
1,judge,49,0,148750,21638,2871.2,5794,full


In [7]:
conn.close()
