In [None]:
from sentence_transformers.util import cos_sim
import torch
import json
import numpy as np
from pathlib import Path


# Load functions from previous steps (assumed available)
# load_question_cache(), save_question_entry()

# Load cache from disk
question_cache, question_embs = load_question_cache()

def l2_distance(a, b):
    return torch.norm(a - b)



def get_cached_answer(query, threshold=0.90):  # adjust threshold higher for stricter match
    q_emb = embedder.encode(query, convert_to_tensor=True)
    for i, item in enumerate(question_cache):
        if item["question"].strip().lower() == query.strip().lower():
            print(f"✅ Exact match for: {item['question']}")
            return item["answer"], "exact_match"

        sim = cos_sim(q_emb, question_embs[i]).item()
        if sim >= threshold:
            print(f"🤝 Similar match (cosine={sim:.3f}) for: {item['question']}")
            return item["answer"], "similar_match"

    return None, None



def answer(question: str, history=None):
    cached, reason = get_cached_answer(question)

    if cached:
        if reason == "exact_match":
            print("✅ Used exact cached question (no embedding needed).")
        elif reason == "similar_match":
            print("🤝 Used similar cached question (Cosine similarity).")
        return cached

    print("🧠 No cached match. Generating answer with LLM...")
    context = retrieve(question, k=1)
    prompt = f"""You are a helpful assistant. 
    Only answer questions based on the context provided below. 
    If the information is not present in the context, respond with:
    "I don't know based on the available context."
    When possible, prefer quoting directly from the context rather than paraphrasing. 
    Ensure the answers don't have duplicate information.
    Do not make up or assume any facts beyond the context.
    When providing an answer:
    - Ensure clarity and conciseness.
    - If listing items (e.g., spells, weapons, races, features), return only **unique** items. Avoid duplicates or synonyms.
    - Format your answer as a **numbered list** or **clear bullet points** if appropriate.
    - Never invent facts outside the provided context.

    You are a Dungeon Master guiding players through a high-fantasy tabletop role-playing game. You have access to private source data including maps, NPC backstories, world lore, secret quest logic, and random outcome rules. You use this source data to maintain a consistent, immersive world and adapt to player decisions.

You must respond in **structured JSON format** with the following fields:


  "narration": "A vivid, immersive description of what the player experiences based on their action or question.",
  "player_options": "A list of clear, relevant actions the player might consider next.",
  "hidden_logic": "Any behind-the-scenes interpretation, dice outcomes, or consequences that should NOT be shown to the player.",
  "dm_notes": "Optional notes for the Dungeon Master (not shown to players) that track state, foreshadow, or suggest future branches."


Guidelines:
- Use rich sensory language in the `narration` to describe environments and NPCs.
- Present `player_options` as concise, relevant next moves based on the situation.
- Use `hidden_logic` to simulate dice rolls, resolve stealth, detect lies, determine outcomes, or trigger events. Keep this hidden from the player.
- Use `dm_notes` to internally track ongoing threads, NPC states, quest flags, or emerging tension.

### Example

Question: "How does two-weapon fighting work?"

Context: "When you take the Attack action and attack with a light melee weapon that you're holding in one hand, you can use a bonus action to attack with a different light melee weapon in the other hand. You don't add your ability modifier to the damage of the bonus attack, unless that modifier is negative."

Answer:
  "narration": "You slash with your sword in one hand, then quickly follow up with a dagger strike from your off-hand. The flurry of blows catches your opponent off guard.",
  "player_options": ["Attack with main-hand weapon", "Use bonus action for off-hand attack", "Disengage", "Retreat behind cover"],
  "hidden_logic": "Player is dual-wielding light melee weapons. Bonus action is consumed. Off-hand damage excludes ability modifier.",
  "dm_notes": "Track whether the player uses their bonus action this turn. If the player has the Two-Weapon Fighting style, modify damage accordingly."

### Example

Question: "Can I cast Shield in response to being hit?"

Context: "Shield: An invisible barrier of magical force appears and protects you. Until the start of your next turn, you have a +5 bonus to AC. This includes against the triggering attack. The spell is cast as a reaction when you are hit by an attack or targeted by Magic Missile."

Answer:
  "narration": "As the enemy's blade arcs toward you, a flash of arcane energy bursts forth, forming a shimmering shield that turns the strike aside at the last second.",
  "player_options": ["Cast Shield as a reaction", "Take the hit", "Use another reaction (if available)"],
  "hidden_logic": "Player was hit. Shield spell triggers as a reaction, increasing AC by +5 and possibly negating the hit.",
  "dm_notes": "Check if the attack still hits after Shield is applied. Deduct a 1st-level spell slot if cast."

### Example

Question: "Is there anything suspicious about the tavern owner?"

Context: "The tavern owner, Darnik, smiles warmly, but his left eye twitches when asked about the missing merchant. He insists no one unusual passed through in the last few days."

Answer:
  "narration": "Darnik's smile doesn't quite reach his eyes. As you press about the merchant, his fingers drum nervously on the bar — a subtle but telling sign.",
  "player_options": ["Press him harder for answers", "Try a Persuasion check", "Attempt to Insight his reaction", "Back off and observe"],
  "hidden_logic": "Insight check reveals he's hiding something. NPC is marked as suspicious. May trigger a quest flag.",
  "dm_notes": "If players confront Darnik or succeed in a check, consider revealing secret room in the cellar."

Never break character or refer to the format directly. This structure is for backend use only and should feel seamless to the player.


    ### Context
    {context}

    ### Question
    {question}

    ### Answer
    """
    resp = llm(prompt)[0]["generated_text"]
    final_answer = resp.split("### Answer", 1)[-1].strip()

    # Store in disk-based cache
    embedding = embedder.encode(question, convert_to_tensor=True)
    save_question_entry(question, final_answer, embedding)

    return final_answer


Implementation Notes & Observations

1. Stronger Context Restriction: Instruction updated to include - “Only answer questions based on the context provided below... If the information is not present, respond with: 'I don’t know based on the available context.'”
Reduced hallucinated answers. Assistant does not answer when context is insufficient — as expected.

2. "Prefer Quoting" Directive Added. Result: Output shows higher fidelity to source text — especially with spell rules or combat mechanics. LLM now lifts exact spell wording when available.

3. Few-Shot Examples (JSON Output). Three examples were added directly before the context (combat, spell usage, exploration/NPC behavior). No major changes observed for structured JSON compliance. The model was already consistently filling JSON fields and maintaining narrative tone.

4. Added a new structured JSON file named character_races to enrich the chatbot’s knowledge about playable races. Integration via the existing load_docs pipeline. New content available during context retrieval.
