In [None]:
!pip install openai langchain faiss-cpu sentence-transformers pymupdf langchain_community transformers

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting pymupdf
  Downloading pymupdf-1.25.5-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Collecting langchain_community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
import fitz
from langchain.chat_models import ChatOpenAI
from langchain.evaluation.qa import QAEvalChain

from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import pipeline
import warnings
warnings.filterwarnings("ignore")
import textwrap

In [None]:
from google import genai
from google.genai import types

In [None]:
from google.api_core import retry


is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503})

genai.models.Models.generate_content = retry.Retry(
    predicate=is_retriable)(genai.models.Models.generate_content)

In [None]:
from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

In [None]:
document=fitz.open("DnD_BasicRules_2018.pdf")
text = ""
for page in document:
    text += page.get_text("text")

In [None]:
doc=Document(page_content=text)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
chunks = text_splitter.split_documents([doc])
print(chunks)



In [None]:
embedder=HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")
db=FAISS.from_documents(chunks,embedder)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/90.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
!pip install rank_bm25

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2


In [None]:
def denseRetrieval(query, k=5):
  return db.similarity_search(query, k=k)

In [None]:
from rank_bm25 import BM25Okapi

tokenized_text=[doc.page_content.split() for doc in chunks]
bm25= BM25Okapi(tokenized_text)

def bm25_search(query,k=5):
  tokenized_query=query.split()
  scores=bm25.get_scores(tokenized_query)
  ranked_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:k]
  return [chunks[i] for i in ranked_indices]

In [None]:
def mmr_search(query, k=5, fetch_k=20, lambda_param=0.5):
    return db.max_marginal_relevance_search(query, k=k, fetch_k=fetch_k, lambda_mult=lambda_param)

In [None]:
def rrf_search(query, k=5, alpha=60):
    dense_results = denseRetrieval(query, k=10)
    bm25_results = bm25_search(query, k=10)

    scores = {}

    for rank, doc in enumerate(dense_results):
        content = doc.page_content
        scores[content] = scores.get(content, 0) + 1 / (alpha + rank + 1)

    for rank, doc in enumerate(bm25_results):
        content = doc.page_content
        scores[content] = scores.get(content, 0) + 1 / (alpha + rank + 1)

    ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    return ranked[:k]

In [None]:
query = "Can a Paladin heal others?"

print("Dense Search:")
dense_array=[]
for doc in denseRetrieval(query):
    dense_array.append(doc)
    print("-", doc.page_content[:100])

print("\nMMR Search:")
mmr_array=[]
for doc in mmr_search(query):
    mmr_array.append(doc)
    print("-", doc.page_content[:100])

print("\nBM25 Search:")
bm25_array=[]
for doc in bm25_search(query):
    bm25_array.append(doc)
    print("-", doc.page_content[:100])

print("\nRRF Search:")
rrf_array=[]
for content, score in rrf_search(query):
    rrf_array.append(content)
    print(f"- ({score:.2f}) {content[:100]}")

Dense Search:
- equal to five times your cleric level. Choose any creatures 
within 30 feet of you, and divide those
- Lathander, Pelor, and Re-Horakhty), gods of healing 
or endurance (such as Ilmater, Mishakal, Apollo
- panions can expect to receive free healing and care at 
a temple, shrine, or other established prese
- in subterranean lairs, and shining paladins stand like 
beacons against the darkness, it’s hard to b
- heal it. If healing is unavailable, the creature can at 
least be stabilized so that it isn’t killed

MMR Search:
- equal to five times your cleric level. Choose any creatures 
within 30 feet of you, and divide those
- operate in isolation from the 
others. This existence instills in 
the enclave’s members a fierce 
s
- panions can expect to receive free healing and care at 
a temple, shrine, or other established prese
- shield guardian as a bodyguard.
	 For adventurers, though, magic is key to their survival. 
Without 
- Its members understands that 
evil wears man

In [None]:

# Retrieve relevant chunks
results = db.similarity_search_with_score(query, k=5)
results = sorted(results, key=lambda x: x[1])

for i, (doc,score) in enumerate(results):
    print(f"--- Chunk {i+1} ---\n{doc.page_content}\n")

--- Chunk 1 ---
equal to five times your cleric level. Choose any creatures 
within 30 feet of you, and divide those hit points among 
them. This feature can restore a creature to no more than 
half of its hit point maximum. You can’t use this feature 
on an undead or a construct.
Blessed Healer
Beginning at 6th level, the healing spells you cast on 
others heal you as well. When you cast a spell of 1st level 
or higher that restores hit points to a creature other than 
you, you regain hit points equal to 2 + the spell’s level.
Divine Strike
At 8th level, you gain the ability to infuse your weapon 
strikes with divine energy. Once on each of your turns 
when you hit a creature with a weapon attack, you can 
cause the attack to deal an extra 1d8 radiant damage to 
the target. When you reach 14th level, the extra damage 
increases to 2d8.
Supreme Healing
Starting at 17th level, when you would normally roll one 
or more dice to restore hit points with a spell, you instead

--- Chunk 2 ---

In [None]:
def gen_prompt(context_array):

  return f"""You are a wise, dramatic, and witty Dungeon Master in a Dungeons & Dragons campaign.
  You narrate responses like a storyteller guiding the party. Stay in character. Respond in JSON with the following attributes:

  -"summary": a short summary of what the user should do
  -"actions": list of D&D actions specific to the problem
  -"tone": tone as described at the start of the prompt ie wise and dramatic
  -"original_response": your full response based on the prompt

  Context:
  {context_array}

  Question:
  {query}

  **Instructions**:
  - Integrate the key points from all retrieved responses into a **cohesive, well-structured answer**.
  - Be descriptive and imaginative
  -lways respond in second-person (“you”) and add a touch of fantasy flavor. Be immersive.
  - Only use info in the rulebook
  -Reply ONLY in JSON

  """

In [None]:
model_name = "Qwen/Qwen2.5-3B-Instruct"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto", #so it can choose gpu/cpu
    torch_dtype="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(model_name)

config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.6k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

In [None]:
generator = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
return_full_text=False,
max_new_tokens=5000,
do_sample=False
)

Device set to use cuda:0


In [None]:
dense_msgs=[{"role": "user", "content": gen_prompt(dense_array)}]
bm25_msgs=[{"role": "user", "content": gen_prompt(bm25_array)}]
rrf_msgs=[{"role": "user", "content": gen_prompt(rrf_array)}]
mmr_msgs=[{"role": "user", "content": gen_prompt(mmr_array)}]

dense_output = generator(dense_msgs)
bm25_output = generator(bm25_msgs)
rrf_output = generator(rrf_msgs)
mmr_output = generator(mmr_msgs)

In [None]:
client = genai.Client(api_key=GOOGLE_API_KEY)

def gen_faithfulness(context_array, answer):
  return f"""
  You are a D&D rules expert. Rate the FAITHFULNESS of the answer to the question, using the context.

  Context:
  {context_array}

  Question: {query}
  Answer: {answer}

  Is the answer fully supported by the context? Rate on a scale of 1 (not supported) to 5 (fully supported) and explain briefly.
  """
def gen_relevance(answer):
    return f"""
  You are evaluating a D&D answer. Rate the RELEVANCE of the answer to the question.

  Question: {query}
  Answer: {answer}

  Does the answer directly address the question? Rate on a scale of 1 (irrelevant) to 5 (fully relevant) and explain briefly.
  """


dense_faithfulness = gen_faithfulness(dense_array,dense_output)
bm25_faithfulness = gen_faithfulness(bm25_array,bm25_msgs)
rrf_faithfulness = gen_faithfulness(rrf_array,rrf_msgs)
mmr_faithfulness = gen_faithfulness(mmr_array,mmr_msgs)


dense_relevance = gen_relevance(dense_output)
bm25_relevance = gen_relevance(bm25_output)
rrf_relevance = gen_relevance(rrf_output)
mmr_relevance = gen_relevance(mmr_output)


dense_response=client.models.generate_content(
    model="gemini-2.0-flash",
    contents=dense_faithfulness)
bm25_response=client.models.generate_content(
    model="gemini-2.0-flash",
    contents=bm25_faithfulness)
rrf_response=client.models.generate_content(
    model="gemini-2.0-flash",
    contents=rrf_faithfulness)
mmr_response=client.models.generate_content(
    model="gemini-2.0-flash",
    contents=mmr_faithfulness)

dense_relevance_response=client.models.generate_content(
    model="gemini-2.0-flash",
    contents=dense_relevance)
bm25_relevance_response=client.models.generate_content(
    model="gemini-2.0-flash",
    contents=bm25_relevance)
rrf_relevance_response=client.models.generate_content(
    model="gemini-2.0-flash",
    contents=rrf_relevance)
mmr_relevance_response=client.models.generate_content(
    model="gemini-2.0-flash",
    contents=mmr_relevance)



print(format_faithfulness_response("Dense Faithfulness", dense_response))
print(format_faithfulness_response("Dense Relevance", dense_relevance_response))
print(format_faithfulness_response("BM25 Faithfulness", bm25_response))
print(format_faithfulness_response("BM25 Relevance", bm25_relevance_response))
print(format_faithfulness_response("RRF Faithfulness", rrf_response))
print(format_faithfulness_response("RRF Relevance", rrf_relevance_response))
print(format_faithfulness_response("MMR Faithfulness", mmr_response))
print(format_faithfulness_response("MMR Relevance", mmr_relevance_response))





✅ Dense Faithfulness :
  Rating: 2
  Explanation: The answer is not fully supported by the context. While the context describes healing abilities, it focuses on the Cleric's Life Domain and doesn't mention Paladin abilities directly.

Therefore, the rating is 2.

✅ Dense Relevance :
  Rating: 5
  Explanation: The answer directly addresses the question of whether a Paladin can heal others. It not only confirms that they can but also elaborates on the ways they can do so, such as through Channel Divinity and spells like Cure Wounds.

✅ BM25 Faithfulness :
  Rating: N/A
  Explanation: I cannot provide a rating because the answer is not included. I need the answer to determine if it is fully supported by the context.

✅ BM25 Relevance :
  Rating: 5
  Explanation: The answer directly addresses the question of whether a Paladin can heal others. It confirms that they can, explains the mechanism (divine magic and spells), and provides additional details about specific features and the nature o

In [None]:
def format_faithfulness_response(name, response):
    try:
        text = response.candidates[0].content.parts[0].text.strip()
    except Exception as e:
        return f"❌ {name} : [Error parsing response] {e}"

    import re

    # Find all possible rating-like expressions
    rating_patterns = re.findall(
        r"(?:rating\s*[:\-]?\s*|rated as\s*|faithfulness rating is\s*|therefore.*?\s*)([1-5])",
        text, flags=re.IGNORECASE
    )

    # Use the last matched rating if available
    rating = rating_patterns[-1] if rating_patterns else "N/A"

    # Try to find an 'Explanation' section
    explanation_match = re.search(r"(?i)explanation[:\s]*(.*)", text, re.DOTALL)
    if explanation_match:
        explanation = explanation_match.group(1).strip()
    else:
        # fallback: everything after 'Rating: X' or just the full text
        explanation = text.strip()

    return f"✅ {name} :\n  Rating: {rating}\n  Explanation: {explanation}\n"
