# Rules of Ultimate RAG
We want to create a way to query the rules of Ultimate using **retrieval augmented generation** (RAG).

In [3]:
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import bs4

In [2]:
from dotenv import load_dotenv
load_dotenv()


# Convert PDF files to text
import fitz # install using: pip install PyMuPDF

def extract_text_from_pdf(pdf_file_path):
    with fitz.open(pdf_file_path) as doc:
        text = ""
        for page in doc:
            text += page.get_text()

    return text


## Load rules
Can use local files (pdf) or from a website

In [35]:
# OPTION 1: load from PDF

# pdf_file_path = "c2-dominion-rulebook.pdf"
pdf_file_path = "Official-Rules-of-Ultimate-2022-2023.pdf"
text_content = extract_text_from_pdf(pdf_file_path)
print(f"Start of text: \n{text_content[0:200]}")

# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
splits = text_splitter.create_documents((text_content,))

Start of text: 
 
2022-23 Official Rules of Ultimate 
 
Preface 
Ultimate is a sport that inspires players and fans alike because of its ability to develop and showcase the 
athleticism, skill, teamwork, and characte


In [40]:
## OPTION 2: Load Documents from a website

# Load text
url = "https://usaultimate.org/rules/"
loader = WebBaseLoader(
    web_paths=(url,),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("site-content")
        )
    ),
)
docs = loader.load()

# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
docs = text_splitter.create_documents((text_content,))
splits = text_splitter.split_documents(docs)

print(f"Start of text: \n{str(splits[0])[0:200]}")

Start of text: 
page_content='2022-23 Official Rules of Ultimate \n \nPreface \nUltimate is a sport that inspires players and fans alike because of its ability to develop and showcase the \nathleticism, skill, teamwo


## Embed rules and store vectors

In [47]:
len(splits[0])

TypeError: object of type 'Document' has no len()

In [42]:
# Store embeddings
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever()


In [43]:
vectorstore

<langchain_community.vectorstores.chroma.Chroma at 0x26a5c5ccb50>

In [50]:
# Load prompt template from langchain hub
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


In [54]:
# Question
question = "how do you start the game"
answer = rag_chain.invoke(question)
print(answer)

To start the game, players shuffle their starting 10 cards (7 Coppers & 3 Estates) and place them face-down as their Deck. They then draw the top 5 cards as their starting hand. Players skip the action phase if they have no action cards and go directly to the buy phase, where they can buy cards from the supply. Each turn consists of three phases: Action phase, Buy phase, and Clean-up phase.
