## Load and split document

In [None]:
from langchain_community.document_loaders import PyPDFLoader

def load_documents(path, page_limit=None):
    loader = PyPDFLoader(path)
    document = loader.load_and_split()
    return document[:page_limit]

document = load_documents("data/DnD_5e_Players_Handbook.pdf", 160)
print(document[:5])



[Document(metadata={'source': 'data/DnD_5e_Players_Handbook.pdf', 'page': 1}, page_content='C o n t e n t s\nP r e f a c e  4\nI n t r o d u c t i o n  5\nWorlds of Adventure ...................................................................5\nUsing This Book.........................................................................6\nHow to Play...................................................................................6\nAdventures....................................................................................7\nP a r t  1 9\nChapter  1: S tep -by -S tep  C haracters .....11\nBeyond 1st Level ...............................................................15\nCh apter  2: R aces ........................................................17\nChoosing a Race................................................................17\nDwarf....................................................................................18\nElf..............................................................

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
from dotenv import load_dotenv
# load_dotenv()
def split_documents(document, chunk_size=600, chunk_overlap=80):
    try:
        text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        is_separator_regex=False
    )
        chunks = text_splitter.split_documents(document)
        return chunks
    except Exception as e:
        print(f"Error during document splitting: {e}")
        return None
    
chunks = split_documents(document)


In [14]:
import tiktoken
encoding = tiktoken.encoding_for_model("gpt-4o-mini")
len(encoding.encode(document[0].page_content))

408

## Document embedding

In [17]:
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_ollama import OllamaEmbeddings

def get_embedding_function():
    embedding_model = OllamaEmbeddings(model='nomic-embed-text')
    return embedding_model



embedding_function = get_embedding_function()
single_vector = embedding_function.embed_query("this is a cat")
# embeddings = embedding_function.encode(chunks, batch_size=16, convert_to_tensor=False)

In [22]:
index = faiss.IndexFlatL2(len(single_vector))
index.ntotal, index.d

(0, 768)

In [23]:
vector_store = FAISS(
    embedding_function=embedding_function,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={} #tag chunk id later
)
vector_store

<langchain_community.vectorstores.faiss.FAISS at 0x17df08d40>

In [None]:
help(vector_store)

In [27]:
len(chunks)

924

In [None]:
ids = vector_store.add_documents(documents=chunks)
ids

In [29]:
len(ids)

924

In [30]:
db = "dnd_rulebook_sample_db"
vector_store.save_local(db)

## Retrieal and Generation

In [12]:
query = "What are the rules for character creation?"
result = db.similarity_search(query)
for r in result:
    print(result[0].page_content)

Exceptions to the rules are often minor. For instance, 
many adventurers don’t have proficiency with longbows, 
but every wood elf does because of a racial trait. That 
trait creates a minor exception in the game. Other 
examples of rule-breaking are more conspicuous. For 
instance, an adventurer can’t normally pass through 
walls, but some spells make that possible. Magic 
accounts for most of the major exceptions to the rules.
R o u n d D o w n
There’s one more general rule you need to know at the 
outset. Whenever you divide a number in the game, 
round down if you end up with a fraction, even if the 
fraction is one-half or greater.
A d v e n t u r e s
The Dungeons  & D ragons  game consists of a group 
of characters embarking on an adventure that the Dun­
geon Master presents to them. Each character brings 
particular capabilities to the adventure in the form of 
ability scores and skills, class features, racial traits, 
equipment, and magic items. Every character is dif­
ferent, 