In [1]:
%pip install -qU langchain-community langchain-huggingface

Note: you may need to restart the kernel to use updated packages.


In [2]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings

loader = WebBaseLoader("https://www.govinfo.gov/content/pkg/CDOC-110hdoc50/html/CDOC-110hdoc50.htm")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", " ", ""])
chunks = text_splitter.split_documents(documents)
print("Number of chunks: ", len(chunks))

# Instantiate the embeddings model. The embeddings model_name can be changed as desired
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

USER_AGENT environment variable not set, consider setting it to identify your requests.


Number of chunks:  430


  from .autonotebook import tqdm as notebook_tqdm
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [3]:
%pip install faiss-cpu --quiet

# Import FAISS class from vectorstore library
from langchain_community.vectorstores import FAISS

# Create the database
vectore_store = FAISS.from_documents(chunks, embeddings)

# Check the number of chunks that have been indexed
vectore_store.index.ntotal

Note: you may need to restart the kernel to use updated packages.


430

In [4]:
query = "how long does the president's term last?"
docs = vectore_store.similarity_search(query, k=3) # returns the top 3 chunks

# print the chunks
for doc in docs:
    print("-" * 80)
    print(doc.page_content)
    print("\n" * 2)


--------------------------------------------------------------------------------
Section 1. No person shall be elected to the office of the 
President more than twice, and no person who has held the 
office of President, or acted as President, for more than two 
years of a term of which some other person was elected 
President shall be elected to the office of the President more 
than once. But this Article shall not apply to any person 
holding the office of President when this Article was proposed 
by the Congress, and shall not prevent any person who may be 
holding the office of President, or acting as President, during 
the term within which this Article becomes operative from 
holding the office of President or acting as President during 
the remainder of such term.
    Section 2. This article shall be inoperative unless it 
shall have been ratified as an amendment to the Constitution by 
the legislatures of three-fourths of the several States within 
seven years from the date of

In [10]:
vectore_store.save_local("vector_db")

# loading the vector database
vectore_store = FAISS.load_local("vector_db", embeddings, allow_dangerous_deserialization=True)