In [1]:
# Set up a User Agent for this session
import os
from langchain_openai import ChatOpenAI
from langchain_chroma import Chroma
from langchain_community.document_loaders import WikipediaLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

os.environ['USER_AGENT'] = 'sports-buddy-advanced'

llm = ChatOpenAI(model="gpt-4o-mini")

loader = WikipediaLoader("2024_Summer_Olympics",)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
splits = text_splitter.split_documents(docs)

database = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = database.as_retriever()

In [2]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever

keyword_retriever = BM25Retriever.from_documents(splits)
keyword_retriever.k =  3

ensemble_retriever = EnsembleRetriever(retrievers=[retriever, keyword_retriever], weights=[0.3, 0.7])

In [3]:
from langchain.chains import RetrievalQA

dense_chain = RetrievalQA.from_chain_type(
    llm=llm, retriever=retriever
)

sparse_chain = RetrievalQA.from_chain_type(
    llm=llm, retriever=ensemble_retriever
)

In [4]:
normal_response = dense_chain.invoke("What happened at the opening ceremony of the 2024 Summer Olympics")
print(normal_response['result'])

The opening ceremony of the 2024 Summer Olympics was held outside of a stadium for the first time in modern Olympic history. Athletes were paraded by boat along the Seine River in Paris.


In [5]:
sparse_response = sparse_chain.invoke("What happened at the opening ceremony of the 2024 Summer Olympics")
print(hybrid_response['result'])

The opening ceremony of the 2024 Summer Olympics took place outside of a stadium for the first time in modern Olympic history, with athletes being paraded by boat along the Seine River in Paris. This unique setting was part of the ceremony, making it a significant and memorable event in Olympic history.
