In [1]:
# Set up a User Agent for this session
import os

os.environ['USER_AGENT'] = 'sports-buddy-advanced'

In [2]:
# Initialize an OpenAI model
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [3]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import WikipediaLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = WikipediaLoader("2024_Summer_Olympics",)
docs = loader.load()

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
splits = text_splitter.split_documents(docs)

database = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

# TODO: Increase the value of 'k' to retrieve more documents
retriever = database.as_retriever(search_kwargs={"k": 1})

retriever.invoke("How was security during the 2024 Olympics")

[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Concerns_and_controversies_at_the_2024_Summer_Olympics', 'summary': 'Various concerns and controversies arose in relation to the 2024 Summer Olympics, including security concerns, human rights issues, and controversy over allowing Israel to participate amidst the Israel–Hamas war, and allowing Russian and Belarusian athletes to compete as neutrals amidst the Russian invasion of Ukraine. Despite the nominal Olympic Truce in place, the wars in Ukraine and Palestine already set a more conflicted political background to the 2024 Summer Olympics, before considering domestic and sporting issues.\n\n', 'title': 'Concerns and controversies at the 2024 Summer Olympics'}, page_content='Various concerns and controversies arose in relation to the 2024 Summer Olympics, including security concerns, human rights issues, and controversy over allowing Israel to participate amidst the Israel–Hamas war, and allowing Russian and Belarusian athle

In [5]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "How was security during the 2024 Olympics"
)

compressed_docs

[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Concerns_and_controversies_at_the_2024_Summer_Olympics', 'summary': 'Various concerns and controversies arose in relation to the 2024 Summer Olympics, including security concerns, human rights issues, and controversy over allowing Israel to participate amidst the Israel–Hamas war, and allowing Russian and Belarusian athletes to compete as neutrals amidst the Russian invasion of Ukraine. Despite the nominal Olympic Truce in place, the wars in Ukraine and Palestine already set a more conflicted political background to the 2024 Summer Olympics, before considering domestic and sporting issues.\n\n', 'title': 'Concerns and controversies at the 2024 Summer Olympics'}, page_content='Various concerns and controversies arose in relation to the 2024 Summer Olympics, including security concerns, human rights issues, and controversy over allowing Israel to participate amidst the Israel–Hamas war, and allowing Russian and Belarusian athle

In [6]:
pip install langchain-cohere

Note: you may need to restart the kernel to use updated packages.


In [7]:
import os
import getpass

if not os.getenv("COHERE_API_KEY"):
    os.environ["COHERE_API_KEY"] = getpass.getpass("Enter your Cohere API Key:")

Enter your Cohere API Key: ········


In [8]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank
from langchain_community.llms import Cohere

llm = Cohere(temperature=0)
compressor = CohereRerank(model="rerank-english-v3.0")
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "How was security during the 2024 Olympics"
)

compressed_docs

  llm = Cohere(temperature=0)


[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Concerns_and_controversies_at_the_2024_Summer_Olympics', 'summary': 'Various concerns and controversies arose in relation to the 2024 Summer Olympics, including security concerns, human rights issues, and controversy over allowing Israel to participate amidst the Israel–Hamas war, and allowing Russian and Belarusian athletes to compete as neutrals amidst the Russian invasion of Ukraine. Despite the nominal Olympic Truce in place, the wars in Ukraine and Palestine already set a more conflicted political background to the 2024 Summer Olympics, before considering domestic and sporting issues.\n\n', 'title': 'Concerns and controversies at the 2024 Summer Olympics', 'relevance_score': 0.9420819}, page_content='Various concerns and controversies arose in relation to the 2024 Summer Olympics, including security concerns, human rights issues, and controversy over allowing Israel to participate amidst the Israel–Hamas war, and allowin