In [1]:
%pip install --quiet --upgrade langchain langchain-community langchain-chroma

Note: you may need to restart the kernel to use updated packages.


In [2]:
# LangsSmith API key: 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
%pip install --quiet -U langchain langchain-openai

Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
# set your langchain api key 

In [4]:
# Load Documents
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader('/Users/gustavo/Developer/RAG-youtube-tutorial/rag-tutorial-v2/data/fig-code-of-points-2025-2028.pdf')
pages = []
async for page in loader.alazy_load():
    pages.append(page)

In [5]:
print(f"{pages[0].metadata}\n")
print(pages[0].page_content)

{'source': '/Users/gustavo/Developer/RAG-youtube-tutorial/rag-tutorial-v2/data/fig-code-of-points-2025-2028.pdf', 'page': 0}

 
Men ’s Artistic Gymnastics – 2025 -2028 Code of Points   Page 1 
   
MEN’S ARTISTIC GYMNASTCS  
Approved by the FIG Executive Committee, on February  2024  
Date of publication:  01st July 2024  CODE OF POINTS  
2025 -2028  


In [6]:
#Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(pages)

In [7]:
# Embed and Retreiver setup
from langchain_community.vectorstores import Chroma # ChromaDB will be used for database storage 
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()


In [8]:
#### Retrieval and generation

# Prompt
from langchain import hub # this is a community hub of prompts for llm
prompt = hub.pull("rlm/rag-prompt")


In [9]:
# LLM
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)


In [10]:
# Post processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


In [11]:
# Chain
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [12]:
rag_chain.invoke("What should judges wear to competitions?")

'Judges should wear the FIG prescribed competition uniform, which consists of a dark blue suit and a white shirt with a tie. In competitions where the uniform is supplied by the Organizing Committee, judges should wear that instead. Additionally, they must maintain a professional appearance throughout the event.'

In [13]:
rag_chain.invoke("How many element groups are there on Pommel horse, and what are they?")

'There are three element groups on the pommel horse, designated as I, II, and III, along with a Dismount Group designated as IV. Gymnasts must include at least one element from each of the three Element Groups in their exercise. The Dismount Group is required to conclude the routine.'

In [14]:
rag_chain.invoke("What element group does a Roth fall into")

'A Roth falls into Element Group III, which includes travel type elements. This group also encompasses other elements like Tong Fei and Wu Guonian.'

In [15]:
rag_chain.invoke("How many cross support travels are allowed during an exercise?")

"A maximum of two cross support travels (forwards and/or backwards) are permitted during an exercise. This rule applies specifically to certain designated travels in men's artistic gymnastics."

In [16]:
rag_chain.invoke("How do you calculate the start value of a routine?")

"To calculate the start value of a routine, begin timing when the gymnast's feet leave the floor. The routine must start in a hang position with straight arms and a vertical body, and any arm bending is only allowed if necessary for the completion of an element. The start value is determined based on the difficulty of the elements performed, as outlined in the Difficulty Tables."

In [17]:
rag_chain.invoke("What is a double front on floor worth?")

"The retrieved context does not provide specific information about the value of a double front on floor. Therefore, I don't know the answer."

In [18]:
rag_chain.invoke("What is a honma straight worth?")

"The context does not provide specific information about the value of a Honma straight. Therefore, I don't know the answer."