# Rules of Ultimate RAG
We want to create a way to query the rules of Ultimate using **retrieval augmented generation** (RAG).

In [1]:
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import bs4

In [2]:
from dotenv import load_dotenv
load_dotenv()


# Convert PDF files to text
import fitz # install using: pip install PyMuPDF

def extract_text_from_pdf(pdf_file_path):
    with fitz.open(pdf_file_path) as doc:
        text = ""
        for page in doc:
            text += page.get_text()

    return text


In [3]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] ="https://api.smith.langchain.com"
os.environ['LANGCHAIN_PROJECT'] = 'Trevor learning'


## Load rules
Can use local files (pdf) or from a website

In [3]:
# OPTION 1: load from PDF

# pdf_file_path = "c2-dominion-rulebook.pdf"
pdf_file_path = "Official-Rules-of-Ultimate-2022-2023.pdf"
text_content = extract_text_from_pdf(pdf_file_path)
print(f"Start of text: \n{text_content[0:200]}")

# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.create_documents((text_content,))

Start of text: 
 
2022-23 Official Rules of Ultimate 
 
Preface 
Ultimate is a sport that inspires players and fans alike because of its ability to develop and showcase the 
athleticism, skill, teamwork, and characte


In [125]:
## OPTION 2: Load Documents from a website

# Load text
url = "https://usaultimate.org/rules/"
loader = WebBaseLoader(
    web_paths=(url,),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("site-content")
        )
    ),
)
docs = loader.load()

# Split text into chunks
chunk_size = 2000
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

print(f"Start of text: \n{str(splits[0])[0:200]}")

Start of text: 
page_content='2024-2025\n\n\nOfficial Rules of Ultimate\n\n\n\n\n\n\n\n\n\nRules Menu\n\n\n\n\n\n\n\nRules of Ultimate\n\n\n\n\n\nRules Resources\n\n\n\n\n\nUltimate in 10 Simple Rules (English)\n\n\n


## Embed rules and store vectors

In [126]:
# import chromadb

# # create vector store
# name = "usau_rules"

# client = chromadb.Client()
# # client.delete_collection(name)
# collection = client.create_collection(name=name)
# collection

# # add content
# collection.add(
#     documents=[doc.page_content.replace("\n", "") for doc in docs],
#     metadatas=[{"source": "usau rules"} for i in range(len(docs))],
#     ids=[f"id_{i}" for i in range(len(docs))],
# )

# query_texts = [
#     "how many stalls are counted during regular play?"
# ]
# results = collection.query(
#     query_texts=query_texts,
#     n_results=1)
# results["documents"]

**Note**: This is all well and good to see how Chroma works, but Langchain can do this faster and easier

In [127]:
# Store embeddings
vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever(
    # search_type="mmr",
    search_kwargs={"k": 5},
    # search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6},
)


## Build rest of chain

In [150]:
# Load prompt template from langchain hub
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


In [152]:
# Question
question = "what is the minimum and maximum distance that a defender can stand to a thrower while marking them?"
answer = rag_chain.invoke(question)
print(answer)

The minimum distance a defender can stand to a thrower while marking them is within ten feet of the thrower's pivot. The maximum distance would be beyond ten feet from the thrower's pivot.


# More advanced Query translation methods:

## 1 Multi-query
A way to ensure you get more useful documents returned.


This method rephrases the original question in several ways, gets the retireved documents for each question, then returns a list of all the unique documents returned from all the queries. 

In [153]:
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_perspectives 
    | ChatOpenAI(temperature=0) 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)
# Try it out
generate_queries.invoke({"question":question})

["1. What are the allowable ranges for a defender's distance from a thrower during marking?",
 '2. How close or far can a defender legally position themselves in relation to a thrower when marking?',
 '3. What is the range of distances within which a defender can stand in proximity to a thrower while marking?',
 '4. What are the limits on how near or far a defender can be from a thrower while engaged in marking?',
 '5. What is the specified minimum and maximum distance a defender must maintain when marking a thrower?']

In [159]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    unique_docs = list(set(flattened_docs))
    return [loads(doc) for doc in unique_docs]


# Retrieve

retrieval_chain = (
    generate_queries   # produces a list of 5 queries
    | retriever.map()  # calls invoke(<question>) on each query generated and returns a list of result lists
    | get_unique_union # get list of relevant contexts
)
docs = retrieval_chain.invoke({"question":question})
len(docs)

2

In [160]:
from operator import itemgetter

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatOpenAI(temperature=0)

final_rag_chain = (
    {"context": retrieval_chain, "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

"The minimum distance a defender can stand to a thrower while marking them is within ten feet of the thrower's pivot."

## 2. RAG Fusion
This is similar to multi-query, but instead of returning all the documents, it returns a list of documents, ranked in order of how frequently they appeared. You can then choose to use the top *n* of them as context.

In [161]:
def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)

2

In [162]:
# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)
final_rag_chain.invoke({"question":question})

"The minimum distance a defender can stand to a thrower while marking them is within ten feet of the thrower's pivot. There is no specific mention of a maximum distance in the provided context."

### 3. Query Decomposition
The idea here is that you break a query down into subqueries, get an answer for each subquery, then send the subqueries and their answers as the context for the original query.

In [164]:
# Decomposition: Break the question into subquestions
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

llm = ChatOpenAI(temperature=0)
generate_queries_decomposition = ( 
    prompt_decomposition 
    | llm 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

questions = generate_queries_decomposition.invoke({"question":question})
questions

['1. What are the standard defensive strategies for marking a thrower in ultimate frisbee?',
 '2. How does the distance between a defender and a thrower affect the effectiveness of marking in ultimate frisbee?',
 '3. Are there any specific rules or guidelines regarding the minimum and maximum distance a defender can stand from a thrower in ultimate frisbee?']

In [165]:
# Prompt
template = """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question: 

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)




In [166]:
def format_qa_pair(question, answer):
    """Format Q and A pair"""
    
    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()

# llm
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# 
q_a_pairs = ""
for q in questions:
    
    rag_chain = (
    {"context": itemgetter("question") | retriever, 
     "question": itemgetter("question"),
     "q_a_pairs": itemgetter("q_a_pairs")} 
    | decomposition_prompt
    | llm
    | StrOutputParser())

    answer = rag_chain.invoke({"question":q,"q_a_pairs":q_a_pairs})
    q_a_pair = format_qa_pair(q,answer)
    q_a_pairs = q_a_pairs + "\n---\n"+  q_a_pair

In [167]:
q_a_pairs

'\n---\nQuestion: 1. What are the standard defensive strategies for marking a thrower in ultimate frisbee?\nAnswer: Some standard defensive strategies for marking a thrower in ultimate frisbee include acknowledging readiness, establishing a pivot at the appropriate spot on the field, loudly announcing "3-2-1," touching the disc to the ground, and loudly announcing "disc in." Additionally, if play is to restart with a check but no offensive player is in possession of the disc at the appropriate spot, play restarts with a defensive self-check where the defender closest to the disc loudly announces "3-2-1 disc in."\n---\nQuestion: 2. How does the distance between a defender and a thrower affect the effectiveness of marking in ultimate frisbee?\nAnswer: The distance between a defender and a thrower in ultimate frisbee is crucial for the effectiveness of marking. According to the rules, the marker, which is the defensive player marking the thrower, must be within ten feet of the thrower\'s 

We now have the individual answers, so let's bring it together

In [171]:
# RAG prompt
prompt_rag = hub.pull("rlm/rag-prompt")

def retrieve_and_rag(question,prompt_rag,sub_question_generator_chain):
    """RAG on each sub-question"""
    
    # Use our decomposition / 
    sub_questions = sub_question_generator_chain.invoke({"question":question})
    
    # Initialize a list to hold RAG chain results
    rag_results = []
    
    for sub_question in sub_questions:
        
        # Retrieve documents for each sub-question
        retrieved_docs = retriever.get_relevant_documents(sub_question)
        
        # Use retrieved documents and sub-question in RAG chain
        answer = (prompt_rag | llm | StrOutputParser()).invoke({"context": retrieved_docs, 
                                                                "question": sub_question})
        rag_results.append(answer)
    
    return rag_results,sub_questions

# Wrap the retrieval and RAG process in a RunnableLambda for integration into a chain
answers, questions = retrieve_and_rag(question, prompt_rag, generate_queries_decomposition)


In [172]:
questions

['1. What are the standard rules and regulations for defender-to-thrower distance in ultimate frisbee?',
 '2. How does the distance between a defender and a thrower impact the effectiveness of marking in ultimate frisbee?',
 '3. Are there any specific strategies or techniques for maintaining an optimal distance while marking a thrower in ultimate frisbee?']

In [173]:
answers

['The standard rules and regulations for defender-to-thrower distance in ultimate frisbee are not explicitly mentioned in the provided context.',
 "The distance between a defender and a thrower impacts the effectiveness of marking in ultimate frisbee by defining the marker as the defensive player within ten feet of the thrower's pivot or the thrower if no pivot has been established. Being within this distance allows the defender to apply pressure and disrupt the thrower's options. Maintaining this proximity can make it harder for the thrower to execute their intended pass.",
 "Defenders should maintain an optimal distance from the thrower to allow play to continue smoothly. It is important to avoid infringing on the thrower's space while marking to ensure fair play. Defenders should be aware of the rules regarding traveling and maintain a proper pivot position during marking."]

In [174]:
def format_qa_pairs(questions, answers):
    """Format Q and A pairs"""
    
    formatted_string = ""
    for i, (question, answer) in enumerate(zip(questions, answers), start=1):
        formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n"
    return formatted_string.strip()

context = format_qa_pairs(questions, answers)

# Prompt
template = """Here is a set of Q+A pairs:

{context}

Use these to synthesize an answer to the question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"context":context,"question":question})

"The minimum distance that a defender can stand to a thrower while marking them in ultimate frisbee is within ten feet of the thrower's pivot or the thrower if no pivot has been established. This distance allows the defender to apply pressure and disrupt the thrower's options effectively. On the other hand, there is no specific maximum distance mentioned in the rules and regulations, but defenders should aim to maintain a close proximity to the thrower to effectively mark them and prevent easy passes. It is important for defenders to find a balance between being close enough to apply pressure and disrupt plays, while also avoiding infringing on the thrower's space and committing fouls."

## 4. HYDE
Since the question and document might not be semantically similar enough to return good matches, the idea here is to generate a 'fake' text based on the query then use the fake text to search for similar texts.

In [182]:
# HyDE document genration
template = """Please write a scientific paper passage to answer the question
Question: {question}
Passage:"""
prompt_hyde = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_docs_for_retrieval = (
    prompt_hyde | ChatOpenAI(temperature=0) | StrOutputParser() 
)

# Run
question = "If a player is on the line when they catch the disc are they out of bounds ?"
generate_docs_for_retrieval.invoke({"question":question})

'In the sport of ultimate frisbee, a player is considered out of bounds if any part of their body, including their feet, is touching the ground outside of the playing field boundaries when they catch the disc. This rule is in place to ensure fair play and to prevent players from gaining an unfair advantage by being out of bounds. Therefore, if a player is on the line when they catch the disc, they are considered out of bounds and the turnover will be awarded to the opposing team. It is important for players to be aware of their positioning on the field to avoid costly turnovers and to adhere to the rules of the game.'

In [183]:
# Retrieve
retrieval_chain = generate_docs_for_retrieval | retriever 
retireved_docs = retrieval_chain.invoke({"question":question})
retireved_docs

[Document(page_content='Contact between players does not confer the state of being in- or out-of-bounds from one to another.\nIf a player catches an in-bounds disc and would reasonably have been able to land in-bounds, but lands on an opposing player in a way that causes their first ground contact to be out-of-bounds, this is to be treated as a force-out foul and 17.I.4.b.4 applies. For this exception to apply, the play resulting in the landing contact cannot be construed as a dangerous play. [[In this case, calls will be resolved in chronological order with the dangerous play superseding the force-out foul.]]\n\n\nA disc becomes in-bounds when it is put into play, or when play starts or restarts.\nA disc becomes out-of-bounds when it first contacts the out-of-bounds area, contacts an out-of-bounds offensive player, or is caught by an out-of-bounds defensive player.\nThe disc may fly outside a perimeter line and return to the playing field, and players may go out-of-bounds to make a pl

In [184]:
# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"context":retireved_docs,"question":question})

'No, if a player catches the disc while on the line, they are considered in-bounds as long as the disc is caught in-bounds.'