In [5]:
## building sample chroma vector DB

from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [6]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["HF_TOKEN"]=os.getenv("HF_TOKEN")

In [7]:
loader=TextLoader("speech.txt")
data=loader.load()
data

[Document(metadata={'source': 'speech.txt'}, page_content='The Burj Khalifa[a] (known as the Burj Dubai prior to its inauguration) is a skyscraper in Dubai, United Arab Emirates. It is the world\'s tallest structure. With a total height of 829.8 m (2,722 ft, or just over half a mile) and a roof height (excluding antenna, but including a 242.6 m spire)[2] of 828 m (2,717 ft), the Burj Khalifa has been the tallest structure and building in the world since its topping out in 2009, surpassing Taipei 101, the previous holder of that status.[3][4]\n\nConstruction of the Burj Khalifa began in 2004, with the exterior completed five years later in 2009. The primary structure is reinforced concrete and some of the structural steel for the building originated from the Palace of the Republic in East Berlin, the former East German parliament.[5] The building was opened in 2010 as part of a new development called Downtown Dubai. It was designed to be the centerpiece of large-scale, mixed-use develop

In [8]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=100,chunk_overlap=20)
text=text_splitter.split_documents(data)
text

[Document(metadata={'source': 'speech.txt'}, page_content='The Burj Khalifa[a] (known as the Burj Dubai prior to its inauguration) is a skyscraper in Dubai,'),
 Document(metadata={'source': 'speech.txt'}, page_content="in Dubai, United Arab Emirates. It is the world's tallest structure. With a total height of 829.8 m"),
 Document(metadata={'source': 'speech.txt'}, page_content='height of 829.8 m (2,722 ft, or just over half a mile) and a roof height (excluding antenna, but'),
 Document(metadata={'source': 'speech.txt'}, page_content='antenna, but including a 242.6 m spire)[2] of 828 m (2,717 ft), the Burj Khalifa has been the'),
 Document(metadata={'source': 'speech.txt'}, page_content='has been the tallest structure and building in the world since its topping out in 2009, surpassing'),
 Document(metadata={'source': 'speech.txt'}, page_content='in 2009, surpassing Taipei 101, the previous holder of that status.[3][4]'),
 Document(metadata={'source': 'speech.txt'}, page_content='Constru

In [9]:
embedding= HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from tqdm.autonotebook import tqdm, trange


In [10]:
chroma_vector_store=Chroma.from_documents(text,embedding=embedding)

In [13]:
query="How many elevators and escalators does burj khalifa has?"
docs=chroma_vector_store.similarity_search(query=query)
docs[0].page_content

'[8] It contains a total of 57 elevators and 8 escalators.'

In [14]:
### saving and loading
chroma_vector_store=Chroma.from_documents(text,embedding=embedding,persist_directory="./chroma_db")

In [15]:
new_db=Chroma(persist_directory="./chroma_db",embedding_function=embedding)
doc=new_db.similarity_search(query=query)
doc[0].page_content


'[8] It contains a total of 57 elevators and 8 escalators.'

In [16]:
new_db.similarity_search_with_score(query=query)

[(Document(metadata={'source': 'speech.txt'}, page_content='[8] It contains a total of 57 elevators and 8 escalators.'),
  0.5425519941527893),
 (Document(metadata={'source': 'speech.txt'}, page_content='antenna, but including a 242.6 m spire)[2] of 828 m (2,717 ft), the Burj Khalifa has been the'),
  0.8253799828454025),
 (Document(metadata={'source': 'speech.txt'}, page_content="from Tower Palace III, the Burj Khalifa's central core houses all vertical transportation except"),
  0.8711452400883476),
 (Document(metadata={'source': 'speech.txt'}, page_content='from "Burj Dubai" to "Burj Khalifa" during inauguration.'),
  0.9609055641747838)]

In [18]:
### retriever

retriever=new_db.as_retriever()
retriever.invoke(query)[0].page_content

'[8] It contains a total of 57 elevators and 8 escalators.'