In [1]:
!pip install openai langchain faiss-cpu sentence-transformers pymupdf langchain_community transformers

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting pymupdf
  Downloading pymupdf-1.25.5-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Collecting langchain_community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community

In [2]:
!pip install scikit-learn



In [3]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
import fitz

from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import pipeline
import warnings
warnings.filterwarnings("ignore")
import textwrap
import re

updated pdf reading method to include page number in retrieved chunks and tested chunk sizes

In [4]:
import fitz
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter

documents = []

# Process each page individually
with fitz.open("DnD_BasicRules_2018.pdf") as document:
    for page_num, page in enumerate(document):
        # Extract text from this page only
        page_text = page.get_text("text")

        # Create Document with metadata containing page number
        documents.append(Document(
            page_content=page_text,
            metadata={"page": page_num + 1}  # Or page.number if available
        ))

# Split documents while preserving metadata
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000, #tested 500, 1000, 2000, 2000 seems to capture an paragraph in its entirety
    chunk_overlap=200, #tested 50, 100 and 200. 200 seems to capture an entire column/paragraph within a chunk (although extra info also gets retrieved)
    separators=["\n\n## ", "\n\n"]
)
chunks = text_splitter.split_documents(documents)

print(f"Number of chunks: {len(chunks)}")
print(f"Sample metadata: {chunks[55].metadata}")

Number of chunks: 180
Sample metadata: {'page': 56}


testing embedding models

In [5]:
snippets = [
    "Doppelganger Medium monstrosity (shapechanger), neutral",
    "Proficiency Bonus Characters have a proficiency bonus determined by level, as detailed in chapter 1.",
    "Choose one domain related to your deity: Knowledge, Life, Light, Nature, Tempest, Trickery, or War"
]

MODEL_NAMES = [
    "BAAI/bge-small-en-v1.5",
    "all-MiniLM-L6-v2",
    "all-mpnet-base-v2",
    "BAAI/bge-small-en"
]

for model_name in MODEL_NAMES:
    embedder = HuggingFaceEmbeddings(model_name=model_name)
    db = FAISS.from_documents(chunks, embedder)
    print(f"\n=== {model_name} ===")
    for snippet in snippets:
        docs = db.similarity_search(snippet, k=3)
        pages = [doc.metadata["page"] for doc in docs]
        print(f"“{snippet[:50]}…” → pages {pages}")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


=== BAAI/bge-small-en-v1.5 ===
“Doppelganger Medium monstrosity (shapechanger), ne…” → pages [124, 150, 162]
“Proficiency Bonus Characters have a proficiency bo…” → pages [12, 38, 9]
“Choose one domain related to your deity: Knowledge…” → pages [24, 25, 39]


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


=== all-MiniLM-L6-v2 ===
“Doppelganger Medium monstrosity (shapechanger), ne…” → pages [124, 13, 140]
“Proficiency Bonus Characters have a proficiency bo…” → pages [61, 12, 38]
“Choose one domain related to your deity: Knowledge…” → pages [24, 173, 172]


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


=== all-mpnet-base-v2 ===
“Doppelganger Medium monstrosity (shapechanger), ne…” → pages [124, 129, 110]
“Proficiency Bonus Characters have a proficiency bo…” → pages [12, 61, 112]
“Choose one domain related to your deity: Knowledge…” → pages [172, 173, 24]


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/90.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


=== BAAI/bge-small-en ===
“Doppelganger Medium monstrosity (shapechanger), ne…” → pages [124, 150, 35]
“Proficiency Bonus Characters have a proficiency bo…” → pages [12, 62, 38]
“Choose one domain related to your deity: Knowledge…” → pages [24, 25, 39]


In [6]:
# ─── Compute Recall@3 and MRR for each model ───
import pandas as pd
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# 1) Your snippets and their true pages (in the same order)
snippets = [
    "Doppelganger Medium monstrosity (shapechanger), neutral",
    "Proficiency Bonus Characters have a proficiency bonus determined by level, as detailed in chapter 1.",
    "Choose one domain related to your deity: Knowledge, Life, Light, Nature, Tempest, Trickery, or War"
]
gold_pages = [    124,    61,    24  ]

MODEL_NAMES = [
    "BAAI/bge-small-en-v1.5",
    "all-MiniLM-L6-v2",
    "all-mpnet-base-v2",
    "BAAI/bge-small-en"
]

K = 3
metrics = []

for model_name in MODEL_NAMES:
    embedder = HuggingFaceEmbeddings(model_name=model_name)
    db = FAISS.from_documents(chunks, embedder)

    hits = 0
    rr_sum = 0.0

    for snippet, gold in zip(snippets, gold_pages):
        docs = db.similarity_search(snippet, k=K)
        pages = [d.metadata["page"] for d in docs]

        if gold in pages:
            hits += 1
            rank = pages.index(gold)
            rr_sum += 1.0 / (rank + 1)
        # else rr_sum += 0 implicitly

    recall_at_k = hits / len(snippets)
    mrr = rr_sum / len(snippets)
    metrics.append({
        "Model": model_name,
        "Recall@3": recall_at_k,
        "MRR": mrr
    })

# 3) Show results
df = pd.DataFrame(metrics).set_index("Model")
print(df)


                        Recall@3       MRR
Model                                     
BAAI/bge-small-en-v1.5  0.666667  0.666667
all-MiniLM-L6-v2        1.000000  1.000000
all-mpnet-base-v2       1.000000  0.611111
BAAI/bge-small-en       0.666667  0.666667


Testing Retrieval functions

In [7]:
!pip install rank_bm25

Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25
Successfully installed rank_bm25-0.2.2


In [8]:
!pip install nltk
import nltk
nltk.download('punkt_tab')



[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [9]:
!pip install sentence-transformers




In [10]:
# Retrieval & Evaluation Pipeline

from langchain_community.embeddings import HuggingFaceEmbeddings
from rank_bm25 import BM25Okapi
from nltk.tokenize import word_tokenize
import numpy as np
import pandas as pd

# 1)  chunks must already exist:
texts = [c.page_content for c in chunks]
pages = [c.metadata["page"]    for c in chunks]

# 2) BM25 setup
tokenized = [word_tokenize(t.lower()) for t in texts]
bm25 = BM25Okapi(tokenized)

# 3) Dense embeddings via all-MiniLM-L6-v2
embedder = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
# embed all chunks once
doc_embs = np.array(embedder.embed_documents(texts))

#  Retrieval functions

def dense_retr(query, k=5):
    """Semantic search with precomputed embeddings."""
    q_emb = np.array(embedder.embed_query(query))
    sims  = doc_embs.dot(q_emb)
    idxs  = np.argsort(sims)[::-1][:k]
    return [pages[i] for i in idxs]

def bm25_retr(query, k=5):
    """Classic BM25 keyword search."""
    tokens = word_tokenize(query.lower())
    scores = bm25.get_scores(tokens)
    idxs   = np.argsort(scores)[::-1][:k]
    return [pages[i] for i in idxs]

def mmr_retr(query, k=5, lambda_mult=0.7):
    """Manual MMR for diversity."""
    q_emb      = np.array(embedder.embed_query(query))
    sims       = doc_embs.dot(q_emb)
    fetch_k    = min(len(sims), k*5)
    candidates = np.argsort(sims)[::-1][:fetch_k]
    selected   = []
    for _ in range(min(k, len(candidates))):
        if not selected:
            selected.append(candidates[0])
        else:
            mmr_scores = []
            for idx in candidates:
                if idx in selected: continue
                diversity = max(doc_embs[idx].dot(doc_embs[j]) for j in selected)
                score     = lambda_mult * sims[idx] - (1 - lambda_mult) * diversity
                mmr_scores.append((score, idx))
            selected.append(max(mmr_scores, key=lambda x: x[0])[1])
    return [pages[i] for i in selected]

def rrf_retr(query, k=5, alpha=60):
    """Reciprocal Rank Fusion of Dense & BM25."""
    d_list = dense_retr(query, k*3)
    b_list = bm25_retr(query, k*3)
    scores = {}
    for rank, p in enumerate(d_list):
        scores[p] = scores.get(p, 0) + 1.0/(alpha + rank + 1)
    for rank, p in enumerate(b_list):
        scores[p] = scores.get(p, 0) + 1.0/(alpha + rank + 1)
    fused = sorted(scores, key=lambda x: scores[x], reverse=True)
    return fused[:k]

# Evaluation

snippets = [
    "Doppelganger Medium monstrosity (shapechanger), neutral",
    "Proficiency Bonus Characters have a proficiency bonus determined by level, as detailed in chapter 1.",
    "Choose one domain related to your deity: Knowledge, Life, Light, Nature, Tempest, Trickery, or War"
]
gold = [124, 61, 24]

def evaluate(fn, k=3):
    recalls, rranks, ndcgs = [], [], []
    for snippet, g in zip(snippets, gold):
        preds = fn(snippet, k)
        hit   = int(g in preds)
        recalls.append(hit)
        if hit:
            rank = preds.index(g)
            rranks.append(1.0/(rank+1))
        else:
            rranks.append(0.0)
        gains     = [1 if p==g else 0 for p in preds]
        discounts = [1.0/np.log2(i+2) for i in range(len(gains))]
        ndcgs.append(sum(g*d for g, d in zip(gains, discounts)))
    return {
        f"Recall@{k}": np.mean(recalls),
        "MRR":         np.mean(rranks),
        f"nDCG@{k}":   np.mean(ndcgs)
    }

methods = {
    "Dense": dense_retr,
    "BM25":  bm25_retr,
    "MMR":   mmr_retr,
    "RRF":   rrf_retr
}

# 4) Compute & show results
results = {name: evaluate(fn) for name, fn in methods.items()}
print(pd.DataFrame(results).T)


       Recall@3       MRR    nDCG@3
Dense  1.000000  1.000000  1.000000
BM25   0.666667  0.500000  0.543643
MMR    1.000000  1.000000  1.000000
RRF    0.666667  0.666667  0.666667


Creating DB

In [11]:
embedder=HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")
db=FAISS.from_documents(chunks,embedder)

# Check the number of stored documents
print(f"Number of documents in the vector store: {db.index.ntotal}")

Number of documents in the vector store: 180


In [12]:
# Get user query
query = input(" Enter your question: ")

# Retrieve relevant chunks
results = db.similarity_search_with_score(query, k=5)
results = sorted(results, key=lambda x: x[1])

for i, (doc,score) in enumerate(results):
    print(f"--- Chunk {i+1} ---\n{doc.page_content}\n")

 Enter your question: When creating a cleric, what is the most important question to  consider?
--- Chunk 1 ---
23
23
 Chapter 3: Classes
D&D Basic Rules (Version 1.0). Not for resale. Permission granted to print and photocopy this document for personal use only.
have a special task in mind for you? Or are you striving to 
prove yourself worthy of a great quest?
Quick Build
You can make a cleric quickly by following these sugges-
tions. First, Wisdom should be your highest ability score, 
followed by Strength or Constitution. Second, choose the 
acolyte background.
Class Features
As a cleric, you gain the following class features.
Hit Points
Hit Dice: 1d8 per cleric level
Hit Points at 1st Level: 8 + your Constitution modifier
Hit Points at Higher Levels: 1d8 (or 5) + your 
Constitution modifier per cleric level after 1st
Proficiencies
Armor: Light armor, medium armor, shields
Weapons: Simple weapons
Tools: None
Saving Throws: Wisdom, Charisma
Skills: Choose two from History, Insight, 

In [13]:
prompt=f"""You are a wise, dramatic, and witty Dungeon Master in a Dungeons & Dragons campaign.
You narrate responses like a storyteller guiding the party. Stay in character. Respond in JSON with the following attributes:

-"summary": a short summary of what the user should do
-"actions": list of D&D actions specific to the problem
-"tone": tone as described at the start of the prompt ie wise and dramatic
-"original_response": your full response based on the prompt

Context:
{results[0][0]}
{results[1][0]}
{results[2][0]}

Question:
{query}

**Instructions**:
- Integrate the key points from all retrieved responses into a **cohesive, well-structured answer**.
- Be descriptive and imaginative
-lways respond in second-person (“you”) and add a touch of fantasy flavor. Be immersive.
- Only use info in the rulebook
-Reply ONLY in JSON

"""

In [14]:
model_name = "Qwen/Qwen2.5-3B-Instruct"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto", #so it can choose gpu/cpu
    torch_dtype="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(model_name)

config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.6k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

In [15]:
generator = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
return_full_text=False,
max_new_tokens=5000,
do_sample=False
)

Device set to use cuda:0


In [16]:
messages = [{"role": "user", "content": prompt}]
output = generator(messages)
print(output)

[{'generated_text': '[\n{\n"summary": "The most important question to consider when creating a cleric is which deity to serve and what principles your character embodies.",\n"actions": [\n"Choose a deity",\n"Consider your relationship to that god",\n"Think about your ultimate goals"\n],\n"tone": "wise and dramatic",\n"original_response": "Ah, you seek to forge a path as a cleric, a servant of the divine. The first step is to choose your patron. Is it a deity of light and justice, or perhaps a dark lord of chaos? Each choice shapes your character\'s destiny and the battles you shall face. Next, ponder your relationship to this deity. Do you enter this service willingly, or does the god compel you? Lastly, consider your ultimate goals. Will you strive to uphold tradition, seek change, or pursue power? These choices will define your character and guide your adventures."\n}\n]'}]


Testing Faithfulness

In [17]:
from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline
import re, json, numpy as np
from collections import Counter

#  QG & QA Models
qg_tok   = T5Tokenizer.from_pretrained("iarfmoose/t5-base-question-generator")
qg_model = T5ForConditionalGeneration.from_pretrained("iarfmoose/t5-base-question-generator")
qa       = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

# QAGS HELPERS
def generate_questions(summary, max_q=5):
    inp     = qg_tok(summary, return_tensors="pt", truncation=True)
    outs    = qg_model.generate(
                 inp.input_ids,
                 num_beams=max_q,
                 num_return_sequences=max_q,
                 max_length=64,
                 early_stopping=True
              )
    return [qg_tok.decode(o, skip_special_tokens=True) for o in outs]

def get_answer(q, ctx):
    return qa(question=q, context=ctx)["answer"].strip().lower()

def f1(a, b):
    t1, t2 = re.findall(r"\w+", a), re.findall(r"\w+", b)
    common = Counter(t1) & Counter(t2)
    n      = sum(common.values())
    return 2*n/(len(t1)+len(t2)) if n else 0

def qags_score(source, summary, max_q=5):
    qs     = generate_questions(summary, max_q)
    scores = [f1(get_answer(q, summary), get_answer(q, source)) for q in qs]
    return float(np.mean(scores)) if scores else 0.0

# BUILD SOURCE & PARSE SUMMARY
source_text = "\n\n".join(doc.page_content for doc, _ in results[:5])
raw         = output[0].get("generated_text", output[0]) if isinstance(output[0], dict) else output[0]
parsed      = json.loads(raw) if isinstance(raw, str) else raw
if isinstance(parsed, list): parsed = parsed[0]
summary     = parsed.get("summary", raw)

#RUN QAGS & SHOW
score = qags_score(source_text, summary, max_q=5)
print("Summary:\n", summary, "\n")
print(f"QAGS Faithfulness: {score:.3f}")


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


pytorch_model.bin:   0%|          | 0.00/892M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/451 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/265M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Device set to use cuda:0


Summary:
 The most important question to consider when creating a cleric is which deity to serve and what principles your character embodies. 

QAGS Faithfulness: 0.094


In [18]:
qs = generate_questions(summary, max_q=5)
for q in qs:
    a_sum = get_answer(q, summary)
    a_src = get_answer(q, source_text)
    score = f1(a_sum, a_src)
    print(f"Q: {q!r}")
    print(f" A_from_summary: {a_sum!r}")
    print(f" A_from_source : {a_src!r}")
    print(f" F1 overlap     : {score:.3f}\n")


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Q: 'What is the most important question to consider when creating a cleric?'
 A_from_summary: 'which deity to serve'
 A_from_source : 'performing sacred rites'
 F1 overlap     : 0.000

Q: 'What is the most important question when creating a cleric?'
 A_from_summary: 'which deity to serve'
 A_from_source : 'insight'
 F1 overlap     : 0.000

Q: 'What are the most important questions to consider when creating a cleric?'
 A_from_summary: 'which deity to serve'
 A_from_source : 'which deity to serve and what principles you \nwant your character to embody'
 F1 overlap     : 0.471

Q: 'What is the most important question to consider when choosing a cleric?'
 A_from_summary: 'which deity to serve'
 A_from_source : 'performing sacred rites'
 F1 overlap     : 0.000

Q: 'What is the most important question when creating a cleric? ?'
 A_from_summary: 'which deity to serve'
 A_from_source : 'performing sacred rites'
 F1 overlap     : 0.000

