## Load and split document

In [None]:
from langchain_community.document_loaders import PyPDFLoader

def load_documents(path, page_limit=None):
    loader = PyPDFLoader(path)
    document = loader.load_and_split()
    return document[:page_limit]

document = load_documents("data/DnD_5e_Players_Handbook.pdf", 160)
print(document[:5])



[Document(metadata={'source': 'data/DnD_5e_Players_Handbook.pdf', 'page': 1}, page_content='C o n t e n t s\nP r e f a c e  4\nI n t r o d u c t i o n  5\nWorlds of Adventure ...................................................................5\nUsing This Book.........................................................................6\nHow to Play...................................................................................6\nAdventures....................................................................................7\nP a r t  1 9\nChapter  1: S tep -by -S tep  C haracters .....11\nBeyond 1st Level ...............................................................15\nCh apter  2: R aces ........................................................17\nChoosing a Race................................................................17\nDwarf....................................................................................18\nElf..............................................................

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
from dotenv import load_dotenv
# load_dotenv()
def split_documents(document, chunk_size=600, chunk_overlap=80):
    try:
        text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        is_separator_regex=False
    )
        chunks = text_splitter.split_documents(document)
        return chunks
    except Exception as e:
        print(f"Error during document splitting: {e}")
        return None
    
chunks = split_documents(document)


In [14]:
import tiktoken
encoding = tiktoken.encoding_for_model("gpt-4o-mini")
len(encoding.encode(document[0].page_content))

408

## Document embedding

In [17]:
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_ollama import OllamaEmbeddings

def get_embedding_function():
    embedding_model = OllamaEmbeddings(model='nomic-embed-text')
    return embedding_model



embedding_function = get_embedding_function()
single_vector = embedding_function.embed_query("this is a cat")
# embeddings = embedding_function.encode(chunks, batch_size=16, convert_to_tensor=False)

In [22]:
index = faiss.IndexFlatL2(len(single_vector))
index.ntotal, index.d

(0, 768)

In [23]:
vector_store = FAISS(
    embedding_function=embedding_function,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={} #tag chunk id later
)
vector_store

<langchain_community.vectorstores.faiss.FAISS at 0x17df08d40>

In [None]:
help(vector_store)

In [27]:
len(chunks)

924

In [None]:
ids = vector_store.add_documents(documents=chunks)
ids

In [29]:
len(ids)

924

In [30]:
db = "dnd_rulebook_sample_db"
vector_store.save_local(db)

# load local db
# new_vector_store = FAISS.load_local(db, embeddings=embedding_function, allow_dangerous_deserialization=True)

## Retrival

In [44]:
query = "What are the rules for character creation?"
results = vector_store.search(query=query, search_type="similarity")
for r in results:
    print(r.page_content)
    print("\n---------\n")

sheet to mean whatever you use to track your character, 
whether it’s a formal character sheet (like the one at the 
end of this book), some form of digital record, or a piece 
of notebook paper. An official D&D character sheet is a 
fine place to start until you know what information you 
need and how you use it during the game.
Building  B ruenor
Each step of character creation includes an example of 
that step, with a player named Bob building his dwarf 
character, Bruenor.
1. C hoose  a R ace
Every character belongs to a race, one of the many

---------

setting, even if the setting is a published world.
U sin g  T his  B o o k
The Player’s Handbook  is divided into three parts.
Part 1 is about creating a character, providing the 
rules and guidance you need to make the character 
you’ll play in the game. It includes information on the 
various races, classes, backgrounds, equipment, and 
other customization options that you can choose from. 
Many of the rules in part 1 rely on mat

In [45]:
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={'k':5, 'fetch_k':100, 'lambda_mult': 1})

In [46]:
# retriever = vector_store.as_retriever(search_type="similarity_score_threshold", 
#                                       search_kwargs={'score_threshold': 0.5},
#                                       )

In [47]:
docs = retriever.invoke(query)
for doc in docs:
    print(doc.page_content)
    print("\n----------\n")

sheet to mean whatever you use to track your character, 
whether it’s a formal character sheet (like the one at the 
end of this book), some form of digital record, or a piece 
of notebook paper. An official D&D character sheet is a 
fine place to start until you know what information you 
need and how you use it during the game.
Building  B ruenor
Each step of character creation includes an example of 
that step, with a player named Bob building his dwarf 
character, Bruenor.
1. C hoose  a R ace
Every character belongs to a race, one of the many

----------

setting, even if the setting is a published world.
U sin g  T his  B o o k
The Player’s Handbook  is divided into three parts.
Part 1 is about creating a character, providing the 
rules and guidance you need to make the character 
you’ll play in the game. It includes information on the 
various races, classes, backgrounds, equipment, and 
other customization options that you can choose from. 
Many of the rules in part 1 rely on ma

## Generation

In [89]:
from langchain import hub
from langchain_ollama import OllamaLLM
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough
import argparse


In [90]:
RESPONSE_SCHEMA = [
    ResponseSchema(name="character_name", description="Name of the character"),
    ResponseSchema(name="class", description="The character's class"),
    ResponseSchema(name="attributes", description="The character's attributes like strength, dexterity, etc."),
    ResponseSchema(name="equipment", description="The character's starting equipment"),
    ResponseSchema(name="background", description="The character's backstory"),
]
parser = StructuredOutputParser.from_response_schemas(RESPONSE_SCHEMA)
model = OllamaLLM(model="llama3.2:1b")

In [91]:
TEMPLATE = """
You are tasked with creating a Dungeons & Dragons 5e character sheet. 
Please provide the details in JSON format adhering to the following schema:

{format_instructions}

Provide:
1. A unique character name based on the race.
2. A class from the DnD 5e rules (e.g., Wizard, Fighter, Rogue).
3. Attributes including strength, dexterity, constitution, intelligence, wisdom, and charisma. Use numbers from 1 to 20.
4. Starting equipment based on the class.
5. A short backstory for the character.

Be creative and provide a detailed response.
"""

format_instructions = parser.get_format_instructions()
prompt = PromptTemplate(input_variables=[], template=TEMPLATE, partial_variables={"format_instructions": format_instructions})

In [100]:
response = model.invoke(prompt.format())

In [102]:
response

'```markdown\n```\n\n**Character Sheet: Eilif Stonefist, Dwarf Cleric**\n\n```json\n{\n\t"character_name": "Eilif Stonefist",\n\t"class": " cleric",\n\t"attributes": "14+2 (Wisdom, 10 Constitution)",\n\t"equipment": "chain armor, warhammer of the gods, shield emblazoned with his family crest, holy symbol of Moradin",\n\t"background": "Acolyte of Moradin"\n}\n```\n\n### Attributes:\n\n| Attribute | Score |\n| --- | --- |\n| Strength | 16 (+3) |\n| Dexterity | 12 (+1) |\n| Constitution | 14+2 (Wisdom, 10 Constitution) |\n| Intelligence | 6 (-2) |\n| Wisdom | 18 (+4) |\n| Charisma | 8 (-1) |\n\n### Starting Equipment:\n\n* Chain armor\n* Warhammer of the gods\n* Shield emblazoned with his family crest\n* Holy symbol of Moradin\n\n### Backstory:\n\nEilif was born and raised in a small dwarven clan deep beneath the earth. From a young age, he showed a natural affinity for magic and healing, which his clan\'s priestess encouraged by training him in the ways of Moradin. As Eilif grew older, h

## boilerplate code below

In [71]:
query = "build a character of paladin class"
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={'k':5, 'fetch_k':100, 'lambda_mult': 1})
results = retriever.invoke(query)
context_text = "\n\n".join([r.page_content for r in results])
context_text

"C lass  F eatu res F igh ting  S tyle\nAs a paladin, you gain the following class features.\nH it  Points\nHit Dice: 1 d10 per paladin level\nHit Points at 1st Level: 10 + your Constitution modifier \nHit Points at Higher Levels: 1 d10 (or 6) + your \nConstitution modifier per paladin level after 1st\nProficiencies\nArmor: All armor, shields\nWeapons: Simple weapons, martial weapons\nTools: None\nSaving Throws: Wisdom, Charisma \nSkills: Choose two from Athletics, Insight, Intimidation, \nMedicine, Persuasion, and Religion\nEquipm ent\nYou start with the following equipment, in addition to\n\nQ uick  Build\nYou can make a paladin quickly by following these \nsuggestions. First, Strength should be your highest \nability score, followed by Charisma. Second, choose the \nnoble background.\n\nend one spell on yourself or on one willing creature \nthat you touch.\nYou can use this feature a number of times equal \nto your Charisma modifier (a minimum of once). You \nregain expended uses wh

In [81]:

prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, question=query, format_instructions=format_instructions)
prompt

"Human: \nUse the following context to build a character:\n\nC lass  F eatu res F igh ting  S tyle\nAs a paladin, you gain the following class features.\nH it  Points\nHit Dice: 1 d10 per paladin level\nHit Points at 1st Level: 10 + your Constitution modifier \nHit Points at Higher Levels: 1 d10 (or 6) + your \nConstitution modifier per paladin level after 1st\nProficiencies\nArmor: All armor, shields\nWeapons: Simple weapons, martial weapons\nTools: None\nSaving Throws: Wisdom, Charisma \nSkills: Choose two from Athletics, Insight, Intimidation, \nMedicine, Persuasion, and Religion\nEquipm ent\nYou start with the following equipment, in addition to\n\nQ uick  Build\nYou can make a paladin quickly by following these \nsuggestions. First, Strength should be your highest \nability score, followed by Charisma. Second, choose the \nnoble background.\n\nend one spell on yourself or on one willing creature \nthat you touch.\nYou can use this feature a number of times equal \nto your Charisma

In [82]:
response_text = model.invoke(prompt)

In [87]:
formatted_response = f"Response: {response_text}\n"
formatted_response

"Response: Based on the provided context, I'll create a complete character sheet for a paladin.\n\n**Name:** Eryndor Thorne\n\n**Class:** Paladin\n\n**Level:** 1st (using suggestions from the quick build)\n\n**Ability Scores:**\n\n* Strength: 16 (+3)\n* Dexterity: 12 (+1)\n* Constitution: 14 (+2)\n* Intelligence: 10 (+0)\n* Wisdom: 13 (+1)\n* Charisma: 18 (+4)\n\n**Background:** Noble\n\n**Equipment:**\n\n* Chain mail armor\n* Shield\n* Holy symbol of Moradin (Dwarven god of creation and smithing)\n* Dagger for close combat\n* Backpack with bedroll, waterskin, rations, and other adventuring gear\n* Quick Build equipment to facilitate character creation\n\n**Features & Traits:**\n\n* **Hit Dice:** 1 d10 per level = 1d10 (or 6) + Constitution modifier per level after 1st = +2\n* **Armor Class:** 19 (with chain mail armor and shield)\n* **Hit Points:** 11 (starting at 10 + Constitution modifier, with +2 for each level after 1st)\n* **Proficiencies:**\n\t+ Armor: All armor\n\t+ Weapons: Si

In [73]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

In [74]:
rag_chain = (
    {"context": retriever|format_docs, "question":RunnablePassthrough()}
    | prompt_template
    | model
    | StructuredOutputParser
)

In [None]:
# result = rag_chain.invoke(query)
# print(result)

ResponseError: invalid format: expected "json" or a JSON schema