In [2]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS

In [None]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter
loader = TextLoader("speech.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size = 200 , chunk_overlap = 30)
docs = text_splitter.split_documents(documents)

Created a chunk of size 470, which is longer than the specified 200
Created a chunk of size 347, which is longer than the specified 200
Created a chunk of size 670, which is longer than the specified 200
Created a chunk of size 984, which is longer than the specified 200
Created a chunk of size 791, which is longer than the specified 200


In [10]:
print(type(docs))
print(type(documents))
print(len(docs))
print(len(documents))

<class 'list'>
<class 'list'>
7
1


In [12]:
import os
os.environ['HUGGING_FACE_API_KEY'] = os.getenv('HUGGING_FACE_API_KEY')
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2",
                                   model_kwargs={"device": "cuda" if os.environ.get("CUDA_VISIBLE_DEVICES") else "cpu"}) # Use GPU if available, otherwise CPU
db = FAISS.from_documents(docs, embeddings)


  from .autonotebook import tqdm as notebook_tqdm


In [15]:
query = "What does the speaker believe is the main reason the United States should enter the war?"
docs = db.similarity_search(query , k=3)
docs


[Document(id='18e2a1af-602b-4104-87c2-914707ebc687', metadata={'source': 'speech.txt'}, page_content='Just because we fight without rancor and without selfish object, seeking nothing for ourselves but what we shall wish to share with all free peoples, we shall, I feel confident, conduct our operations as belligerents without passion and ourselves observe with proud punctilio the principles of right and of fair play we profess to be fighting for.'),
 Document(id='b535ecf0-1aee-47ad-9732-bb7c5d3ac8e4', metadata={'source': 'speech.txt'}, page_content='It is a distressing and oppressive duty, gentlemen of the Congress, which I have performed in thus addressing you. There are, it may be, many months of fiery trial and sacrifice ahead of us. It is a fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of all wars, civilization itself seeming to be in the balance. But the right is more precious than peace, and we shall fight for the things which we 

In [16]:
#vectordb as a retriever, we can also convert it to a retriever , this allows us to use it with other langchain components
retriever = db.as_retriever(search_type = "similarity", search_kwargs = {"k": 3})
docs = retriever.invoke(query)
print(docs)

[Document(id='18e2a1af-602b-4104-87c2-914707ebc687', metadata={'source': 'speech.txt'}, page_content='Just because we fight without rancor and without selfish object, seeking nothing for ourselves but what we shall wish to share with all free peoples, we shall, I feel confident, conduct our operations as belligerents without passion and ourselves observe with proud punctilio the principles of right and of fair play we profess to be fighting for.'), Document(id='b535ecf0-1aee-47ad-9732-bb7c5d3ac8e4', metadata={'source': 'speech.txt'}, page_content='It is a distressing and oppressive duty, gentlemen of the Congress, which I have performed in thus addressing you. There are, it may be, many months of fiery trial and sacrifice ahead of us. It is a fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of all wars, civilization itself seeming to be in the balance. But the right is more precious than peace, and we shall fight for the things which we h

##Similarity Search with Score
There are some FAISS specific methods. One of them is similarity_search_with_score , which allows you to return not only the documents but also the distance score of the query to them. The returned distance score is L2(manhattan distance), therefor lower score is better


In [17]:
docs_and_scores = db.similarity_search_with_score(query)
docs_and_scores

[(Document(id='18e2a1af-602b-4104-87c2-914707ebc687', metadata={'source': 'speech.txt'}, page_content='Just because we fight without rancor and without selfish object, seeking nothing for ourselves but what we shall wish to share with all free peoples, we shall, I feel confident, conduct our operations as belligerents without passion and ourselves observe with proud punctilio the principles of right and of fair play we profess to be fighting for.'),
  np.float32(1.2606812)),
 (Document(id='b535ecf0-1aee-47ad-9732-bb7c5d3ac8e4', metadata={'source': 'speech.txt'}, page_content='It is a distressing and oppressive duty, gentlemen of the Congress, which I have performed in thus addressing you. There are, it may be, many months of fiery trial and sacrifice ahead of us. It is a fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of all wars, civilization itself seeming to be in the balance. But the right is more precious than peace, and we shall fi

In [18]:
embedding_vector = embeddings.embed_query(query)
print(embedding_vector)

[-0.008726360276341438, -0.018167592585086823, 0.005125285126268864, -0.014269951730966568, 0.014014532789587975, 0.07833597809076309, 0.08270476758480072, -0.08437392115592957, -0.055650968104600906, 0.019338766112923622, -0.06483781337738037, 0.06825461238622665, -0.01838253252208233, 0.010822591371834278, 0.04148860648274422, 0.03414866700768471, 0.02296089194715023, -0.10548102110624313, 0.011171403340995312, 0.018314801156520844, 0.03937722370028496, -0.004646689631044865, 0.03150428086519241, 0.06253266334533691, -0.02633834071457386, 0.032884445041418076, -0.007247856352478266, 0.08146040141582489, -0.05030947923660278, 0.022972041741013527, 0.002709963358938694, 0.003941631875932217, 0.033495720475912094, 0.03611495718359947, 0.025765176862478256, -0.012924467213451862, 0.10891018807888031, 0.011238057166337967, -0.024519503116607666, -0.042747221887111664, -0.030726756900548935, -0.05031953379511833, 0.04366019740700722, 0.08062636852264404, -0.004446972161531448, 0.0496046431

In [21]:
docs_score_by_vectors = db.similarity_search_by_vector(embedding_vector)
print(docs_score_by_vectors)

[Document(id='18e2a1af-602b-4104-87c2-914707ebc687', metadata={'source': 'speech.txt'}, page_content='Just because we fight without rancor and without selfish object, seeking nothing for ourselves but what we shall wish to share with all free peoples, we shall, I feel confident, conduct our operations as belligerents without passion and ourselves observe with proud punctilio the principles of right and of fair play we profess to be fighting for.'), Document(id='b535ecf0-1aee-47ad-9732-bb7c5d3ac8e4', metadata={'source': 'speech.txt'}, page_content='It is a distressing and oppressive duty, gentlemen of the Congress, which I have performed in thus addressing you. There are, it may be, many months of fiery trial and sacrifice ahead of us. It is a fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of all wars, civilization itself seeming to be in the balance. But the right is more precious than peace, and we shall fight for the things which we h

In [22]:
#save and load db
db.save_local("faiss_index")


In [24]:
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
docs = new_db.similarity_search(query)
docs

[Document(id='18e2a1af-602b-4104-87c2-914707ebc687', metadata={'source': 'speech.txt'}, page_content='Just because we fight without rancor and without selfish object, seeking nothing for ourselves but what we shall wish to share with all free peoples, we shall, I feel confident, conduct our operations as belligerents without passion and ourselves observe with proud punctilio the principles of right and of fair play we profess to be fighting for.'),
 Document(id='b535ecf0-1aee-47ad-9732-bb7c5d3ac8e4', metadata={'source': 'speech.txt'}, page_content='It is a distressing and oppressive duty, gentlemen of the Congress, which I have performed in thus addressing you. There are, it may be, many months of fiery trial and sacrifice ahead of us. It is a fearful thing to lead this great peaceful people into war, into the most terrible and disastrous of all wars, civilization itself seeming to be in the balance. But the right is more precious than peace, and we shall fight for the things which we 