In [1]:
import os
DATA_PATH = "langchain-course-main\\01-Data-Connections\\some_data"
(os.listdir(DATA_PATH))

['.ipynb_checkpoints',
 'FDR_State_of_Union_1944.txt',
 'Lincoln_State_of_Union_1862.txt',
 'penguins.csv',
 'SomeReport.pdf',
 'some_markdown.md',
 'some_website.html',
 'text_file_one.txt',
 'US_Constitution.txt']

In [2]:
import logging
logging.getLogger("langchain_text_splitters.base").setLevel(logging.ERROR)

In [3]:
# Build a sample vectorDB

#vector db
from langchain_chroma import Chroma
#llm
from langchain_openai import ChatOpenAI
#embedding
from langchain_openai import OpenAIEmbeddings
#document loading
from langchain.document_loaders import TextLoader
#text splitting (chunks)
from langchain_text_splitters import CharacterTextSplitter
#retrievers
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor 


In [None]:
print(us_constitution_helper("What is the 13th Amendment?"))

13th Amendment
Section 1
Neither slavery nor involuntary servitude, except as a punishment for crime whereof the party shall have been duly convicted, shall exist within the United States, or any place subject to their jurisdiction.


In [4]:
def us_constitution_helper(question):
    '''
    Takes in a question about the US Constitution and returns the most relevant
    part of the constitution. Notice it may not directly answer the actual question!
    
    Follow the steps below to fill out this function:
    '''
    # PART ONE:
    # LOAD "some_data/US_Constitution in a Document object
    documents = TextLoader(os.path.join(DATA_PATH, "US_Constitution.txt")).load()
    
    # PART TWO
    # Split the document into chunks (you choose how and what size)
    text_splitter = CharacterTextSplitter(chunk_size=300)
    splitted_docs = text_splitter.split_documents(documents=documents)
    
    # PART THREE
    # EMBED THE Documents (now in chunks) to a persisted ChromaDB
    embedding_function = OpenAIEmbeddings()
    db = Chroma.from_documents(splitted_docs,
                                embedding=embedding_function,
                                persist_directory="./usconst")
    retriever = db.as_retriever()

    # PART FOUR
    # Use ChatOpenAI and ContextualCompressionRetriever to return the most
    # relevant part of the documents.
    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    compressor = LLMChainExtractor.from_llm(llm=llm)
    compressor_retriever = ContextualCompressionRetriever(base_compressor=compressor,
                                                          base_retriever=retriever)
    
    relevant_docs = compressor_retriever.invoke(input=question)
    if len(relevant_docs) == 0:
        return "no relevant information was found"
    
    return relevant_docs[0].page_content

In [9]:
print(us_constitution_helper("whats the best part of the constitution?"))

no relevant information was found


In [8]:
documents = TextLoader(os.path.join(DATA_PATH, "US_Constitution.txt")).load()

In [13]:
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=300)

In [14]:
splitted_docs = text_splitter.split_documents(documents=documents)

Created a chunk of size 333, which is longer than the specified 300
Created a chunk of size 472, which is longer than the specified 300
Created a chunk of size 312, which is longer than the specified 300


In [15]:
len(splitted_docs)

68

In [16]:
embedding_function = OpenAIEmbeddings()
db = Chroma.from_documents(documents=splitted_docs,
                           embedding=embedding_function)

In [17]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

In [18]:
compressor = LLMChainExtractor.from_llm(llm=llm)

In [19]:
compressor_retriever = ContextualCompressionRetriever(base_compressor=compressor,
                                                      base_retriever=db.as_retriever())

In [30]:
docs = compressor_retriever.invoke(input="liberty")

In [31]:
print(docs[0].page_content)

First Amendment  
Congress shall make no law respecting an establishment of religion, or prohibiting the free exercise thereof; or abridging the freedom of speech, or of the press; or the right of the people peaceably to assemble, and to petition the Government for a redress of grievances.  

Second Amendment  
A well regulated Militia, being necessary to the security of a free State, the right of the people to keep and bear Arms, shall not be infringed.


In [29]:
print(db.similarity_search('liberty')[2].page_content)

No Person held to Service or Labour in one State, under the Laws thereof, escaping into another, shall, in Consequence of any Law or Regulation therein, be discharged from such Service or Labour, but shall be delivered up on Claim of the Party to whom such Service or Labour may be due.

Section 3
New States may be admitted by the Congress into this Union; but no new State shall be formed or erected within the Jurisdiction of any other State; nor any State be formed by the Junction of two or more States, or Parts of States, without the Consent of the Legislatures of the States concerned as well as of the Congress.

The Congress shall have Power to dispose of and make all needful Rules and Regulations respecting the Territory or other Property belonging to the United States; and nothing in this Constitution shall be so construed as to Prejudice any Claims of the United States, or of any particular State.

Section 4
The United States shall guarantee to every State in this Union a Republic