In [5]:
#import all required libraries 
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
import dotenv
#local embedding system from langchain community 
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
#for splitting text into chunk 
from langchain_text_splitters import CharacterTextSplitter

In [6]:
# load the document and split it into chunks
loader = PyPDFLoader("/Users/saeedanwar/Desktop/Ai-bot/FrecnhyDe Progress Report.pdf")
documents = loader.load()

In [7]:
# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)


In [8]:

# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")


In [9]:
# load it into Chroma
db = Chroma.from_documents(docs, embedding_function)


In [10]:
# query it
query = "de frenchy website "
docs = db.similarity_search(query)

# print results
print(docs[0].page_content)

Frenchy De Dubai Progress Report  
Website  
• Home page  (Done)  
• Login, signup modals (Done)  
• Verification of email while registering.  (In Queue)  
• Our story Page (Done)  
• Restaurant page  (Done)  
• Restaurant internal page  (In Queue)  
• Blogs designs (Not confirm from client yet)  
• Blogs internal page  (In Queue)  
• Reservation date picker  (In Queue)  
• Reservation modals  (In Queue)  
• Reservation Cancellation  (In Queue)  
• Contact us Page  (Done)  
• Privacy policy page  (In Queue)  
• 404 Page  (In Queue)  
 
Admin Panel  
• Dashboard Analytics (Not yet design ed) 
• Add Restaurant  (Done)  
• List of Restaurants, Edit and Delete  (Done)  
• Reviews of Restaurant  (Done)  
• Table Reservations  (In Queue)  
• Restaurant category  (Done)  
• Add Events  (In Queue)  
• Listing Events, Edit and Delete  (In Queue)  
• Add Blogs  (In Queue)  
• Listing Blogs, Edit and Delete  (In Queue)  
• Leeds Managements -  Login profiles, Queries and subscribers  (In Queue)  

In [14]:
# save to disk
db2 = Chroma.from_documents(docs, embedding_function, persist_directory="./chroma_db")
docs = db2.similarity_search(query)


In [15]:
docs

[Document(page_content='Frenchy De Dubai Progress Report  \nWebsite  \n• Home page  (Done)  \n• Login, signup modals (Done)  \n• Verification of email while registering.  (In Queue)  \n• Our story Page (Done)  \n• Restaurant page  (Done)  \n• Restaurant internal page  (In Queue)  \n• Blogs designs (Not confirm from client yet)  \n• Blogs internal page  (In Queue)  \n• Reservation date picker  (In Queue)  \n• Reservation modals  (In Queue)  \n• Reservation Cancellation  (In Queue)  \n• Contact us Page  (Done)  \n• Privacy policy page  (In Queue)  \n• 404 Page  (In Queue)  \n \nAdmin Panel  \n• Dashboard Analytics (Not yet design ed) \n• Add Restaurant  (Done)  \n• List of Restaurants, Edit and Delete  (Done)  \n• Reviews of Restaurant  (Done)  \n• Table Reservations  (In Queue)  \n• Restaurant category  (Done)  \n• Add Events  (In Queue)  \n• Listing Events, Edit and Delete  (In Queue)  \n• Add Blogs  (In Queue)  \n• Listing Blogs, Edit and Delete  (In Queue)  \n• Leeds Managements -  L

In [16]:
# load from disk
db3 = Chroma(persist_directory="./chroma_db", embedding_function=embedding_function)


In [29]:
docs = db3.similarity_search(query)
# print(docs[0].page_content)

In [27]:
type(db3)

langchain_chroma.vectorstores.Chroma

In [28]:
type(docs)

list