In [1]:
# Helper function for printing docs


def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

In [2]:
from langchain_community.document_loaders import TextLoader
# from langchain_community.vectorstores import FAISS
# from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter


from langchain.vectorstores import Chroma

from langchain_google_genai import GoogleGenerativeAIEmbeddings
gemini_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") # type: ignore

documents = TextLoader("./2022_state_of_the_union_address.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

db = Chroma.from_documents(texts, gemini_embeddings, persist_directory='./2022soua')
# retriever = FAISS.from_documents(texts, gemini_embeddings).as_retriever()
# retriever = Chroma.from_documents(texts, gemini_embeddings).as_retriever()

# docs = retriever.invoke("What did the president say about Ketanji Brown Jackson")
docs = db.similarity_search("What did the president say about Ketanji Brown Jackson")
pretty_print_docs(docs)

Document 1:

As I did four days ago, I’ve nominated a Circuit Court of Appeals — Ketanji Brown Jackson. One of our nation’s top legal minds who will continue in just Brey- — Justice Breyer’s legacy of excellence. A former top litigator in private practice, a former federal public defender from a family of public-school educators and police officers — she’s a consensus builder.

Since she’s been nominated, she’s received a broad range of support, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans.

JUDGE KETANJI BROWN JACKSON
President Biden's Unity Agenda
LEARN MORE
Since she’s been nominated, she’s received a broad range of support, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans.

Folks, if we are to advance liberty and justice, we need to secure our border and fix the immigration system.
----------------------------------------------------------------------------------------------------
Do

In [3]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
# from langchain_openai import OpenAI
from langchain_google_genai import ChatGoogleGenerativeAI

# llm = OpenAI(temperature=0)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash") #,convert_system_message_to_human=True,temperature=0)

compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=db.as_retriever()
)

compressed_docs = compression_retriever.invoke(
    "What did the president say about Ketanji Jackson Brown"
)
pretty_print_docs(compressed_docs)

Document 1:

As I did four days ago, I’ve nominated a Circuit Court of Appeals — Ketanji Brown Jackson. One of our nation’s top legal minds who will continue in just Brey- — Justice Breyer’s legacy of excellence. A former top litigator in private practice, a former federal public defender from a family of public-school educators and police officers — she’s a consensus builder.


In [4]:
print(len(compressed_docs))

1


In [6]:
from langchain.retrievers.document_compressors import LLMChainFilter

_filter = LLMChainFilter.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=_filter, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "What did the president say about Ketanji Jackson Brown"
)
pretty_print_docs(compressed_docs)

Document 1:

As I did four days ago, I’ve nominated a Circuit Court of Appeals — Ketanji Brown Jackson. One of our nation’s top legal minds who will continue in just Brey- — Justice Breyer’s legacy of excellence. A former top litigator in private practice, a former federal public defender from a family of public-school educators and police officers — she’s a consensus builder.

Since she’s been nominated, she’s received a broad range of support, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans.

JUDGE KETANJI BROWN JACKSON
President Biden's Unity Agenda
LEARN MORE
Since she’s been nominated, she’s received a broad range of support, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans.

Folks, if we are to advance liberty and justice, we need to secure our border and fix the immigration system.


In [9]:
from langchain.retrievers.document_compressors import EmbeddingsFilter
# from langchain_openai import OpenAIEmbeddings

# embeddings = OpenAIEmbeddings()
embeddings= gemini_embeddings
embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.25)
  # similarity_threshold=0.76   no doc returned
  # similarity_threshold=0.25   4 docs returned

compression_retriever = ContextualCompressionRetriever(
    base_compressor=embeddings_filter, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "What did the president say about Ketanji Jackson Brown"
)
pretty_print_docs(compressed_docs)

Document 1:

As I did four days ago, I’ve nominated a Circuit Court of Appeals — Ketanji Brown Jackson. One of our nation’s top legal minds who will continue in just Brey- — Justice Breyer’s legacy of excellence. A former top litigator in private practice, a former federal public defender from a family of public-school educators and police officers — she’s a consensus builder.

Since she’s been nominated, she’s received a broad range of support, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans.

JUDGE KETANJI BROWN JACKSON
President Biden's Unity Agenda
LEARN MORE
Since she’s been nominated, she’s received a broad range of support, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans.

Folks, if we are to advance liberty and justice, we need to secure our border and fix the immigration system.
----------------------------------------------------------------------------------------------------
Do

In [10]:
len(compressed_docs)

4

In [13]:
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain_text_splitters import CharacterTextSplitter

splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0, separator=". ")
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
# relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.25)
pipeline_compressor = DocumentCompressorPipeline(
    transformers=[splitter, redundant_filter, relevant_filter]
)

In [14]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor, base_retriever=retriever
)

compressed_docs = compression_retriever.invoke(
    "What did the president say about Ketanji Jackson Brown"
)
pretty_print_docs(compressed_docs)

Created a chunk of size 362, which is longer than the specified 300
Created a chunk of size 543, which is longer than the specified 300
Created a chunk of size 353, which is longer than the specified 300
Created a chunk of size 310, which is longer than the specified 300


Document 1:

As I did four days ago, I’ve nominated a Circuit Court of Appeals — Ketanji Brown Jackson. One of our nation’s top legal minds who will continue in just Brey- — Justice Breyer’s legacy of excellence
----------------------------------------------------------------------------------------------------
Document 2:

A former top litigator in private practice, a former federal public defender from a family of public-school educators and police officers — she’s a consensus builder.

Since she’s been nominated, she’s received a broad range of support, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans.

JUDGE KETANJI BROWN JACKSON
President Biden's Unity Agenda
LEARN MORE
Since she’s been nominated, she’s received a broad range of support, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans.

Folks, if we are to advance liberty and justice, we need to secure our border and fix the immigratio