In [2]:
# Build a sample vectorDB
from langchain.vectorstores import Chroma
# from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
# from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor 

from langchain_community.document_loaders import TextLoader

from langchain_google_genai import GoogleGenerativeAIEmbeddings

# from langchain_openai import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI
# from langchain.retrievers.document_compressors import LLMChainExtractora


In [3]:
gemini_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") # type: ignore
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0) # type: ignore #,convert_system_message_to_human=True,temperature=0)
compressor = LLMChainExtractor.from_llm(llm)

In [5]:
documents = TextLoader("../langchain-course-main/01-Data-Connections/some_data/US_Constitution.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
db = Chroma.from_documents(texts, gemini_embeddings, persist_directory='./USConst')
results = db.similarity_search("What is the 13th Amendment?")

Created a chunk of size 1035, which is longer than the specified 1000
Created a chunk of size 1144, which is longer than the specified 1000
Created a chunk of size 1576, which is longer than the specified 1000
Created a chunk of size 2353, which is longer than the specified 1000
Created a chunk of size 1670, which is longer than the specified 1000


In [8]:
print(len(results))
print(results[0].page_content)

4
13th Amendment
Section 1
Neither slavery nor involuntary servitude, except as a punishment for crime whereof the party shall have been duly convicted, shall exist within the United States, or any place subject to their jurisdiction.

Section 2
Congress shall have power to enforce this article by appropriate legislation.

14th Amendment
Section 1
All persons born or naturalized in the United States, and subject to the jurisdiction thereof, are citizens of the United States and of the State wherein they reside. No State shall make or enforce any law which shall abridge the privileges or immunities of citizens of the United States; nor shall any State deprive any person of life, liberty, or property, without due process of law; nor deny to any person within its jurisdiction the equal protection of the laws.


In [9]:
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=db.as_retriever())

In [11]:
compressed_docs = compression_retriever.invoke("What is the 13th Amendment?")

In [13]:
print(compressed_docs)
print(len(compressed_docs))

[Document(metadata={'source': '../langchain-course-main/01-Data-Connections/some_data/US_Constitution.txt'}, page_content='>>>\n13th Amendment\nSection 1\nNeither slavery nor involuntary servitude, except as a punishment for crime whereof the party shall have been duly convicted, shall exist within the United States, or any place subject to their jurisdiction.\n\nSection 2\nCongress shall have power to enforce this article by appropriate legislation.\n>>>'), Document(metadata={'source': '../langchain-course-main/01-Data-Connections/some_data/US_Constitution.txt'}, page_content='>>>\n13th Amendment\nSection 1\nNeither slavery nor involuntary servitude, except as a punishment for crime whereof the party shall have been duly convicted, shall exist within the United States, or any place subject to their jurisdiction.\n\nSection 2\nCongress shall have power to enforce this article by appropriate legislation.\n>>>')]
2


In [14]:
print(compressed_docs[0].page_content)

>>>
13th Amendment
Section 1
Neither slavery nor involuntary servitude, except as a punishment for crime whereof the party shall have been duly convicted, shall exist within the United States, or any place subject to their jurisdiction.

Section 2
Congress shall have power to enforce this article by appropriate legislation.
>>>


In [15]:
print(compressed_docs[1].page_content)

>>>
13th Amendment
Section 1
Neither slavery nor involuntary servitude, except as a punishment for crime whereof the party shall have been duly convicted, shall exist within the United States, or any place subject to their jurisdiction.

Section 2
Congress shall have power to enforce this article by appropriate legislation.
>>>


In [None]:
def us_constitution_helper(question):
    '''
    Takes in a question about the US Constitution and returns the most relevant
    part of the constitution. Notice it may not directly answer the actual question!
    
    Follow the steps below to fill out this function:
    '''
    # PART ONE:
    # LOAD "some_data/US_Constitution in a Document object
     
    
    # PART TWO
    # Split the document into chunks (you choose how and what size)
    
    # PART THREE
    # EMBED THE Documents (now in chunks) to a persisted ChromaDB
     

    # PART FOUR
    # Use ChatOpenAI and ContextualCompressionRetriever to return the most
    # relevant part of the documents.

     

    pass

## Example Usage:

Notice how it doesn't return an entire Document of a large chunk size, but instead the "compressed" version!

In [None]:
print(us_constitution_helper("What is the 13th Amendment?"))

13th Amendment
Section 1
Neither slavery nor involuntary servitude, except as a punishment for crime whereof the party shall have been duly convicted, shall exist within the United States, or any place subject to their jurisdiction.
