FAISS

In [5]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter 

In [12]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")

In [9]:
loaders=TextLoader("speech.txt")
documents=loaders.load()
text_splitter=CharacterTextSplitter(chunk_size=100,chunk_overlap=50)
docs=text_splitter.split_documents(documents=documents)

Created a chunk of size 487, which is longer than the specified 100
Created a chunk of size 454, which is longer than the specified 100
Created a chunk of size 528, which is longer than the specified 100


In [13]:
docs

[Document(metadata={'source': 'speech.txt'}, page_content="The Burj Khalifa[a] (known as the Burj Dubai prior to its inauguration) is a skyscraper in Dubai, United Arab Emirates. It is the world's tallest structure. With a total height of 829.8 m (2,722 ft, or just over half a mile) and a roof height (excluding antenna, but including a 242.6 m spire)[2] of 828 m (2,717 ft), the Burj Khalifa has been the tallest structure and building in the world since its topping out in 2009, surpassing Taipei 101, the previous holder of that status.[3][4]"),
 Document(metadata={'source': 'speech.txt'}, page_content='Construction of the Burj Khalifa began in 2004, with the exterior completed five years later in 2009. The primary structure is reinforced concrete and some of the structural steel for the building originated from the Palace of the Republic in East Berlin, the former East German parliament.[5] The building was opened in 2010 as part of a new development called Downtown Dubai. It was design

In [11]:
embeddings=OpenAIEmbeddings(model="text-embedding-3-large")

In [14]:
db=FAISS.from_documents(documents=docs,embedding=embeddings)

In [16]:
db

<langchain_community.vectorstores.faiss.FAISS at 0x18a85ea2250>

In [20]:
### Querying the DB

query="How many elevators and escalators does burj khalifa has?"
doc=db.similarity_search(query=query)

In [21]:
doc[0].page_content

"The design is derived from the Islamic architecture of the region, such as in the Great Mosque of Samarra. The Y-shaped tripartite floor geometry is designed to optimise residential and hotel space. A buttressed central core and wings are used to support the height of the building. Although this design was derived from Tower Palace III, the Burj Khalifa's central core houses all vertical transportation except egress stairs within each of the wings.[7] The structure also features a cladding system which is designed to withstand Dubai's hot summer temperatures. [8] It contains a total of 57 elevators and 8 escalators."

In [23]:
### Retriever
retriever=db.as_retriever()
doc=retriever.invoke(query)

In [24]:
doc[0].page_content

"The design is derived from the Islamic architecture of the region, such as in the Great Mosque of Samarra. The Y-shaped tripartite floor geometry is designed to optimise residential and hotel space. A buttressed central core and wings are used to support the height of the building. Although this design was derived from Tower Palace III, the Burj Khalifa's central core houses all vertical transportation except egress stairs within each of the wings.[7] The structure also features a cladding system which is designed to withstand Dubai's hot summer temperatures. [8] It contains a total of 57 elevators and 8 escalators."

Similarity Search With Score

In [26]:
docs_and_score=db.similarity_search_with_score(query=query)
docs_and_score

[(Document(metadata={'source': 'speech.txt'}, page_content="The design is derived from the Islamic architecture of the region, such as in the Great Mosque of Samarra. The Y-shaped tripartite floor geometry is designed to optimise residential and hotel space. A buttressed central core and wings are used to support the height of the building. Although this design was derived from Tower Palace III, the Burj Khalifa's central core houses all vertical transportation except egress stairs within each of the wings.[7] The structure also features a cladding system which is designed to withstand Dubai's hot summer temperatures. [8] It contains a total of 57 elevators and 8 escalators."),
  0.80295235),
 (Document(metadata={'source': 'speech.txt'}, page_content="The Burj Khalifa[a] (known as the Burj Dubai prior to its inauguration) is a skyscraper in Dubai, United Arab Emirates. It is the world's tallest structure. With a total height of 829.8 m (2,722 ft, or just over half a mile) and a roof he

In [27]:
# We can also pass the vectors to DB after converting the query into embeddings
embeddings_vector= embeddings.embed_query(query)
embeddings_vector

[-0.03788268193602562,
 0.01610442064702511,
 -0.0066398633643984795,
 -0.044070105999708176,
 0.013022936880588531,
 0.0014299246249720454,
 -0.015541926957666874,
 0.0367821529507637,
 0.0036500906571745872,
 0.060847073793411255,
 0.004438803996890783,
 -0.010344981215894222,
 -0.007465261034667492,
 0.04847222566604614,
 -0.010907473973929882,
 0.03211101517081261,
 -0.0006782779819332063,
 -0.02885833941400051,
 0.023881498724222183,
 0.012986252084374428,
 -0.008700300008058548,
 -0.01780412718653679,
 -0.01277837436646223,
 -0.01593322679400444,
 -0.01164727471768856,
 0.031083853915333748,
 0.028026826679706573,
 -0.016997072845697403,
 -0.004383777268230915,
 -0.026706190779805183,
 -0.005695241969078779,
 -0.023636935278773308,
 0.009195538237690926,
 0.005132749211043119,
 -0.01856227032840252,
 0.03724682331085205,
 0.060944899916648865,
 -0.033333826810121536,
 0.02536110021173954,
 0.007220698520541191,
 -0.023967094719409943,
 -0.014062326401472092,
 -0.03499684855341911

In [28]:
docs_score=db.similarity_search_by_vector(embeddings_vector)
docs_score

[Document(metadata={'source': 'speech.txt'}, page_content="The design is derived from the Islamic architecture of the region, such as in the Great Mosque of Samarra. The Y-shaped tripartite floor geometry is designed to optimise residential and hotel space. A buttressed central core and wings are used to support the height of the building. Although this design was derived from Tower Palace III, the Burj Khalifa's central core houses all vertical transportation except egress stairs within each of the wings.[7] The structure also features a cladding system which is designed to withstand Dubai's hot summer temperatures. [8] It contains a total of 57 elevators and 8 escalators."),
 Document(metadata={'source': 'speech.txt'}, page_content="The Burj Khalifa[a] (known as the Burj Dubai prior to its inauguration) is a skyscraper in Dubai, United Arab Emirates. It is the world's tallest structure. With a total height of 829.8 m (2,722 ft, or just over half a mile) and a roof height (excluding a

In [29]:
### saving and loading

db.save_local("faiss_index")

In [30]:
new_db=FAISS.load_local("faiss_index",embeddings=embeddings,allow_dangerous_deserialization=True)
docs_new_db=new_db.similarity_search(query=query)
docs_new_db

[Document(metadata={'source': 'speech.txt'}, page_content="The design is derived from the Islamic architecture of the region, such as in the Great Mosque of Samarra. The Y-shaped tripartite floor geometry is designed to optimise residential and hotel space. A buttressed central core and wings are used to support the height of the building. Although this design was derived from Tower Palace III, the Burj Khalifa's central core houses all vertical transportation except egress stairs within each of the wings.[7] The structure also features a cladding system which is designed to withstand Dubai's hot summer temperatures. [8] It contains a total of 57 elevators and 8 escalators."),
 Document(metadata={'source': 'speech.txt'}, page_content="The Burj Khalifa[a] (known as the Burj Dubai prior to its inauguration) is a skyscraper in Dubai, United Arab Emirates. It is the world's tallest structure. With a total height of 829.8 m (2,722 ft, or just over half a mile) and a roof height (excluding a