In [48]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

text_docs = TextLoader("speech.txt")
text_load = text_docs.load()
text_load


[Document(metadata={'source': 'speech.txt'}, page_content='Democracy and Zulifqar Ali Bhutto (Ex Prime minister of Pakistan)\n\nDemocracy is our Politics, Islam is our religion, Socialism is our Economy.\n\n The second elected Prime Minister of pakistan was Banzeer Bhutto daughter of zulifqar Ali Bhutto. \n \n\n This statement was dileverd by Ex Pakistani Prime Minister Zulifqar Ali Bhutto. \n\n\n He was the first elected Prime minister of Pakistan. \n \n He become a prime minister of Pakistan after winning first general elections of Pakistan.\n')]

In [49]:
splitter = RecursiveCharacterTextSplitter(chunk_size= 200, chunk_overlap= 20)
text_spliiter = splitter.split_documents(text_load)
text_spliiter

[Document(metadata={'source': 'speech.txt'}, page_content='Democracy and Zulifqar Ali Bhutto (Ex Prime minister of Pakistan)\n\nDemocracy is our Politics, Islam is our religion, Socialism is our Economy.'),
 Document(metadata={'source': 'speech.txt'}, page_content='The second elected Prime Minister of pakistan was Banzeer Bhutto daughter of zulifqar Ali Bhutto. \n \n\n This statement was dileverd by Ex Pakistani Prime Minister Zulifqar Ali Bhutto.'),
 Document(metadata={'source': 'speech.txt'}, page_content='He was the first elected Prime minister of Pakistan. \n \n He become a prime minister of Pakistan after winning first general elections of Pakistan.')]

In [50]:
embedding = HuggingFaceEmbeddings(model_name= "all-MiniLM-L6-v2")
chroma_db = Chroma.from_documents(documents=text_spliiter, embedding=embedding, persist_directory="./chroma_db")
chroma_db

<langchain_chroma.vectorstores.Chroma at 0x19e9c070950>

In [51]:
query = "Who was the second elected prime minister of pakistan?"
answer_2 = chroma_db.similarity_search_with_score(query)
answer_2

[(Document(metadata={'source': 'speech.txt'}, page_content='He was the first elected Prime minister of Pakistan. \n \n He become a prime minister of Pakistan after winning first general elections of Pakistan.'),
  0.43707388935882063),
 (Document(metadata={'source': 'speech.txt'}, page_content='He was the first elected Prime minister of Pakistan. \n \n He become a prime minister of Pakistan after winning first general elections of Pakistan.'),
  0.43707388935882063),
 (Document(metadata={'source': 'speech.txt'}, page_content='He was the first elected Prime minister of Pakistan. \n \n He become a prime minister of Pakistan after winning first general elections of Pakistan.'),
  0.43707388935882063),
 (Document(metadata={'source': 'speech.txt'}, page_content='He was the first elected Prime minister of Pakistan. \n \n He become a prime minister of Pakistan after winning first general elections of Pakistan.'),
  0.43707388935882063)]

In [52]:
answer_2[0]

(Document(metadata={'source': 'speech.txt'}, page_content='He was the first elected Prime minister of Pakistan. \n \n He become a prime minister of Pakistan after winning first general elections of Pakistan.'),
 0.43707388935882063)

In [56]:
#Load from disk

query_3 = "banzeer Bhutto" 
answer_3 = chroma_db.similarity_search(query_3)
print(answer_3)

[Document(metadata={'source': 'speech.txt'}, page_content='The second elected Prime Minister of pakistan was Banzeer Bhutto daughter of zulifqar Ali Bhutto. \n \n\n This statement was dileverd by Ex Pakistani Prime Minister Zulifqar Ali Bhutto.'), Document(metadata={'source': 'speech.txt'}, page_content='The second elected Prime Minister of pakistan was Banzeer Bhutto daughter of zulifqar Ali Bhutto. \n \n\n This statement was dileverd by Ex Pakistani Prime Minister Zulifqar Ali Bhutto.'), Document(metadata={'source': 'speech.txt'}, page_content='The second elected Prime Minister of pakistan was Banzeer Bhutto daughter of zulifqar Ali Bhutto. \n \n\n This statement was dileverd by Ex Pakistani Prime Minister Zulifqar Ali Bhutto.'), Document(metadata={'source': 'speech.txt'}, page_content='The second elected Prime Minister of pakistan was Banzeer Bhutto daughter of zulifqar Ali Bhutto. \n \n\n This statement was dileverd by Ex Pakistani Prime Minister Zulifqar Ali Bhutto.')]


In [54]:
answer_3[0].page_content

'He was the first elected Prime minister of Pakistan. \n \n He become a prime minister of Pakistan after winning first general elections of Pakistan.'

In [57]:
retriever = chroma_db.as_retriever()
retriever_response = retriever.invoke(query_3)
retriever_response

[Document(metadata={'source': 'speech.txt'}, page_content='The second elected Prime Minister of pakistan was Banzeer Bhutto daughter of zulifqar Ali Bhutto. \n \n\n This statement was dileverd by Ex Pakistani Prime Minister Zulifqar Ali Bhutto.'),
 Document(metadata={'source': 'speech.txt'}, page_content='The second elected Prime Minister of pakistan was Banzeer Bhutto daughter of zulifqar Ali Bhutto. \n \n\n This statement was dileverd by Ex Pakistani Prime Minister Zulifqar Ali Bhutto.'),
 Document(metadata={'source': 'speech.txt'}, page_content='The second elected Prime Minister of pakistan was Banzeer Bhutto daughter of zulifqar Ali Bhutto. \n \n\n This statement was dileverd by Ex Pakistani Prime Minister Zulifqar Ali Bhutto.'),
 Document(metadata={'source': 'speech.txt'}, page_content='The second elected Prime Minister of pakistan was Banzeer Bhutto daughter of zulifqar Ali Bhutto. \n \n\n This statement was dileverd by Ex Pakistani Prime Minister Zulifqar Ali Bhutto.')]