# Setting up OPEN AI API KEY in the environment

In [1]:
import os
os.environ["OPENAI_API_KEY"]=""


In [12]:
def load_llm():
        from langchain_openai import ChatOpenAI
        llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) #gpt-4o-mini
        return llm

# Purpose of the Code:
This function loads and initializes a language model (LLM) using LangChain’s ChatOpenAI class, which connects to OpenAI's GPT models. It sets the specific model and behavior for generating responses.

# Function Definition:
Defines a function named load_llm that initializes and returns a pre-configured instance of a language model.

# Import the Required Class:
ChatOpenAI: A LangChain class for interacting with OpenAI’s chat-based models (like gpt-3.5-turbo and gpt-4).

# Initialize the LLM:
model_name="gpt-3.5-turbo": Specifies the OpenAI model to use. In this case, it uses GPT-3.5-Turbo, a fast and cost-efficient version of OpenAI's GPT models.

You can replace it with another model (e.g., "gpt-4" or "gpt-4o-mini" if supported).

temperature=0: Controls the randomness of the model’s output.

temperature=0: Produces deterministic and consistent responses, making it ideal for tasks requiring factual accuracy.

Higher temperatures (e.g., 0.7) would make responses more creative and varied but less predictable.

In [13]:
from langchain.prompts import ChatPromptTemplate

def load_prompt():
        prompt = """ You need to answer the question in the sentence as same as in the  pdf content. . 
        Given below is the context and question of the user.
        context = {context}
        question = {question}
        if the answer is not in the pdf , answer "i donot know what the hell you are asking about"
         """
        prompt = ChatPromptTemplate.from_template(prompt)
        return prompt

# Purpose of the Prompt:
This instructs the language model to:
Provide answers strictly based on the content of the PDF.
Use the given context (a portion of the PDF content) and the question (user input) to derive the response.
Return "i donot know what the hell you are asking about" if the question cannot be answered from the provided context.
# Dynamic Variables:
{context}: Will be replaced with relevant text extracted from the PDF during execution.
{question}: Will be replaced with the user’s query.

In [14]:
#Import Dependencies
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings


def load_knowledgeBasee():
        embeddings=OpenAIEmbeddings()
        DB_FAISS_PATH = 'vectorstore/db_faiss'
        db = FAISS.load_local(DB_FAISS_PATH, embeddings,allow_dangerous_deserialization=True)
        return db

# Purpose of the Code:
The function load_knowledgeBasee loads a pre-existing FAISS vector store (which contains document embeddings) from a local directory. This makes it possible to perform semantic searches or retrieve relevant content without rebuilding the vector store every time.
# Define the Function:
A function named load_knowledgeBasee is defined to encapsulate the process of loading a saved FAISS vector store.
# Initialize Embeddings:
Purpose:

Creates an instance of OpenAIEmbeddings to match the embedding format of the saved FAISS vector store.
These embeddings ensure compatibility when performing searches on the loaded vector store.

Details:

OpenAIEmbeddings requires an API key from OpenAI, which should already be set in the environment for this to work.
# Define the Path to the FAISS Vector Store:
pecifies the directory where the FAISS vector store is stored (vectorstore/db_faiss).
# Load the FAISS Vector Store:
FAISS.load_local:
Loads the vector store from the local directory (DB_FAISS_PATH).
Uses the embeddings instance for compatibility with the stored data.

Parameter: allow_dangerous_deserialization=True:
Ensures that deserialization of the stored FAISS vector store is allowed, even if there are potential risks.
This should be used cautiously in secure environments to avoid loading untrusted or tampered files.


In [15]:
knowledgeBase=load_knowledgeBasee()
llm=load_llm()
prompt=load_prompt()

In [16]:
def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

# Purpose of the Code:
The function format_docs takes a list of document objects (docs) and extracts their text content. It combines the content into a single string, with two newlines (\n\n) separating the individual documents.

# Extract and Combine Document Content:
doc.page_content:
Accesses the page_content attribute of each document object in the docs list.
Assumes that each document object has this attribute containing its text.

for doc in docs:
Iterates over each document object in the docs list.

"\n\n".join(...):
Joins the extracted text from all documents into a single string.
The two newline characters (\n\n) are used as separators between the content of different documents.

In [17]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
query = "who is krishna"
if True:
    similar_embeddings=knowledgeBase.similarity_search(query)
    similar_embeddings=FAISS.from_documents(documents=similar_embeddings, embedding=OpenAIEmbeddings())
                
    #creating the chain for integrating llm,prompt,stroutputparser
    retriever = similar_embeddings.as_retriever()
    rag_chain = (
                        {"context": retriever | format_docs, "question": RunnablePassthrough()}
                        | prompt
                        | llm
                        | StrOutputParser()
                    )
                
    response=rag_chain.invoke(query)

In [18]:
response

'Krishna is described as the reservoir of pleasure and the primeval cause of everything. He is also known as Govinda, or one who gives pleasure to the senses. Krishna establishes Himself as the source of the material and spiritual worlds, and the origin of Brahman and Paramatma. He is worshipped by those whose eyes are anointed with the salve of divine love. Krishna is the Original Person who is always meditated upon by His beloved devotees.'

# Purpose of the Code:
This code performs a retrieval-augmented generation (RAG) workflow using LangChain components. It:

1)Retrieves relevant documents from a knowledge base based on a query.
2)Reformats the retrieved documents.
3)Combines the context and query into a structured prompt.
4)Passes the prompt to a language model (LLM) to generate a response.
5)Parses and outputs the response.

# Retrieve Similar Documents:
knowledgeBase.similarity_search(query):
Searches the FAISS vector store (knowledgeBase) for documents most similar to the query.
Returns a list of document objects containing the most relevant content.

# Create a Subvector Store:
Purpose:
Creates a new FAISS vector store containing only the retrieved similar documents.
Uses the same OpenAIEmbeddings to ensure compatibility.

Why?:
By creating a subvector store, subsequent operations can work with only the most relevant subset of the knowledge base.

# Create a Retriever:
Converts the subvector store (similar_embeddings) into a retriever.
A retriever is a simplified interface to search and retrieve text from a vector store.

#  Define the RAG Chain:
1)RAG Chain Workflow:
Input as a Dictionary:
Input consists of two keys:
"context": Retrieved documents are processed through retriever and formatted by format_docs.
"question": The query is passed directly through RunnablePassthrough without changes.

2)prompt:
Combines the "context" and "question" into a structured prompt (created by load_prompt()).

3)llm:
The prompt is passed to the loaded language model (llm), which generates a response.

4)StrOutputParser:
Converts the response from the LLM into a simple string format for easier use.

# Checking the Performance by changing the model name and temperature

In [2]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

def load_llm():
        from langchain_openai import ChatOpenAI
        llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.7) #gpt-4o-mini
        return llm

from langchain.prompts import ChatPromptTemplate

def load_prompt():
        prompt = """ You need to answer the question in the sentence as same as in the  pdf content. . 
        Given below is the context and question of the user.
        context = {context}
        question = {question}
        if the answer is not in the pdf , answer "i donot know what the hell you are asking about"
         """
        prompt = ChatPromptTemplate.from_template(prompt)
        return prompt

def load_knowledgeBasee():
        embeddings=OpenAIEmbeddings()
        DB_FAISS_PATH = 'vectorstore/db_faiss'
        db = FAISS.load_local(DB_FAISS_PATH, embeddings,allow_dangerous_deserialization=True)
        return db



In [11]:
knowledgeBase=load_knowledgeBasee()
llm=load_llm()
prompt=load_prompt()

def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
query = "What is kurukshetra"
if True:
    similar_embeddings=knowledgeBase.similarity_search(query)
    similar_embeddings=FAISS.from_documents(documents=similar_embeddings, embedding=OpenAIEmbeddings())
                
    #creating the chain for integrating llm,prompt,stroutputparser
    retriever = similar_embeddings.as_retriever()
    rag_chain = (
                        {"context": retriever | format_docs, "question": RunnablePassthrough()}
                        | prompt
                        | llm
                        | StrOutputParser()
                    )
                
    response1=rag_chain.invoke(query)

response1

'Kurukṣetra is a holy place 160 km north of Delhi in the state of Haryana in India. It was the location where the great Mahābhārata war was fought between the Pāṇḍavas and the Kauravas, and where Śrī Kṛṣṇa spoke the Bhagavad-gītā to His devotee, Arjuna.'

In [5]:
# First Query
query = "FOr which Arjuna is popular for"
response1 = rag_chain.invoke(query)
print("Response to Query 1:", response1)

Response to Query 1: Arjuna is popular for being a key warrior in the Mahabharata and for his role in the Bhagavad-gita, where he receives guidance from Lord Krishna on duty, righteousness, and self-realization during the Kurukshetra war.


In [6]:
# Second Query
query = "FOr which skill Arjuna is popular for?"
response1 = rag_chain.invoke(query)
response1

'I do not know what the hell you are asking about.'

In [7]:
# Third Query
query = "Provide me names of Pandavas?"
response1 = rag_chain.invoke(query)
response1

'The names of the Pandavas are not mentioned in the provided content. I do not know what the hell you are asking about.'

In [None]:
query = "Who is dusyasana?"
response1 = rag_chain.invoke(query)
response1