In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

key=os.getenv("OPENAI_API_KEY")

# Data Ingestion

In [2]:

from langchain_community.document_loaders import TextLoader

loader= TextLoader("speech.txt")
documents=loader.load()
#documents

[Document(page_content='MEDITATION ACCORDING TO YOGA\nWHAT IS MEDITATION?\nWhat is meditation? Meditation is the power which enables us to resist\nall this. Nature may call us, "Look, there is a beautiful thing!" I do not look.\nNow she says, "There is a beautiful smell; smell it!" I say to my nose, "Do not\nsmell it", and the nose doesn\'t. "Eyes, do not see!" Nature does such an awful\nthing - kills one of my children, and says, "Now, rascal, sit down and weep! Go\nto the depths!" I say, "I don\'t have to." I jump up. I must be free. Try it\nsometimes... [In meditation], for a moment, you can change this nature. Now,\nif you had that power in yourself, would not that be heaven, freedom? That is\nthe power of meditation.\nHow is it to be attained? In a dozen different ways. Each temperament\nhas its own way. But this is the general principle: get hold of the mind. The\nmind is like a lake, and every stone that drops into it raises waves. These waves\ndo not let us see what we are. The

In [17]:
# We can directly load from the webpage using webbase loader
from langchain_community.document_loaders import WebBaseLoader
import bs4

url_path=("https://www.scotthyoung.com/blog/the-best-articles-on-life-philosophy/",)
loader=WebBaseLoader(web_paths=url_path,
                     bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                          class_=("content", "entry-content") # Hteml classes
                     )),
                    )
text_document=loader.load()
#text_document

In [19]:
# PDF Reader

from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader("MEDITATION.pdf")
pdf_documents=loader.load()

In [21]:
#pdf_documents

# Creating Chunks


In [22]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
text_chunks=text_splitter.split_documents(pdf_documents)

In [24]:
text_chunks[:4]

[Document(page_content='MEDITATION  ACCORDING TO YOGA  \nWHAT  IS MEDITATION?  \nWhat is medita tion? Meditation is the power which enables us to resist \nall this. Nature may call us, "Look, there is a beautiful thing!" I do not look. \nNow she says, "There is a beautiful smell; smell it!" I say to my nose, "Do not \nsmell it", and the nose doesn\'t. "Eyes , do not see!" N ature does such an awful \nthing - kills one of my children, and says, "Now, rascal, sit down and weep! Go \nto the depths!" I say, "I don\'t have to." I jump up. I must be free. Try it \nsometimes. .. [In meditation], for a moment, you can change this nature. Now, \nif you had that power in yourself, would not that be heaven, freedom? That is \nthe power of meditation.  \nHow is it to be attained? In a dozen different ways. Each temperament \nhas its own way. But this is the general principle: get hold of the mind. The \nmind is like a lake, and every stone that drops into it raises waves. These waves', metadata={'

# Vector Embedding & Vector Store

# Chroma DB

In [25]:
#openAI
from langchain_community.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma # this will act like Local DB we no need api key to store data

db=Chroma.from_documents(text_chunks,OpenAIEmbeddings())

  warn_deprecated(


In [28]:
# Now we can ask any query to retrieve the information but query should be releated to the pdf document
query="What is meditations?"
response=db.similarity_search(query,k=2)


In [29]:
response # We got the 2 response one more option is avaliable to filter . using any one of the LLM model to filter results

[Document(page_content="THE GATE TO BLISS  \nMeditation is the gate that opens that to us. Prayers, ceremonials, and all \nthe other forms of worship are simply kindergartens of meditation. You pray, you \noffer somethin g. A certain theory existed that everything raised one's spiritual \npower. The use of certain words, flowers, images, temples, ceremonials like the \nwaving of lights brings the mind to that attitude, but that attitude is always in the \nhuman soul, nowhere els e. [People] are all doing it; but what they do without \nknowing it, do knowingly. That is the power of meditation.  \nSlowly and gradually we are to  train ourselves. It is no joke - not a \nquestion of a day, or years, or maybe of births. Never mind! The pull mu st go on. \nKnowingly, voluntarily, the pull must go on. Inch by inch we will gain ground. \nWe will begin to feel and get real possessions, which no one can take away from \nus - the wealth that no man can take, the wealth that nobody can destroy, th

## FAISS vectordb

In [30]:
from langchain_community.vectorstores import FAISS

faiss_db=FAISS.from_documents(text_chunks,OpenAIEmbeddings())

In [32]:
response=faiss_db.similarity_search(query,k=2)

In [33]:
response

[Document(page_content="THE GATE TO BLISS  \nMeditation is the gate that opens that to us. Prayers, ceremonials, and all \nthe other forms of worship are simply kindergartens of meditation. You pray, you \noffer somethin g. A certain theory existed that everything raised one's spiritual \npower. The use of certain words, flowers, images, temples, ceremonials like the \nwaving of lights brings the mind to that attitude, but that attitude is always in the \nhuman soul, nowhere els e. [People] are all doing it; but what they do without \nknowing it, do knowingly. That is the power of meditation.  \nSlowly and gradually we are to  train ourselves. It is no joke - not a \nquestion of a day, or years, or maybe of births. Never mind! The pull mu st go on. \nKnowingly, voluntarily, the pull must go on. Inch by inch we will gain ground. \nWe will begin to feel and get real possessions, which no one can take away from \nus - the wealth that no man can take, the wealth that nobody can destroy, th