# save the open ai key in environment variable

In [5]:
import os
os.environ["OPENAI_API_KEY"]="your_api_key"

# import the necessary modules

In [6]:
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader

CHROMA_PATH = ".\chroma5"


# load the data into chroma db

In [7]:
loaders = PyPDFLoader("C:\\Users\\padma\\Downloads\\Task_for_internship\\The-Jungle-Books-text.pdf")

documents = loaders.load()

db = Chroma.from_documents(
    documents,
    OpenAIEmbeddings(),
    persist_directory=CHROMA_PATH
  )
db.persist()

# defining prompt template

In [8]:
PROMPT_TEMPLATE = """
Answer the question based only on the following context:
{context}
 - -
Answer the question based on the above context: {question}
"""

In [9]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

In [10]:
def query_rag(query_text):
  """
  Query a Retrieval-Augmented Generation (RAG) system using Chroma database and OpenAI.
  Args:
    - query_text (str): The text to query the RAG system with.
  Returns:
    - formatted_response (str): Formatted response including the generated text and sources.
    - response_text (str): The generated response text.
  """
  # YOU MUST - Use same embedding function as before
  embedding_function = OpenAIEmbeddings()

  # Prepare the database
  db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
  
  # Retrieving the context from the DB using similarity search
  results = db.similarity_search_with_relevance_scores(query_text, k=3)

  # Check if there are any matching results or if the relevance score is too low
  # if len(results) == 0 or results[0][1] < 0.7:
  #   print(f"Unable to find matching results.")

  # Combine context from matching documents
  context_text = "\n\n - -\n\n".join([doc.page_content for doc, _score in results])
 
  # Create prompt template using context and query text
  prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
  prompt = prompt_template.format(context=context_text, question=query_text)
  
  # Initialize OpenAI chat model
  model = ChatOpenAI()

  # Generate response text based on the prompt
  response_text = model.predict(prompt)
 
   # Get sources of the matching documents
  sources = [doc.metadata.get("source", None) for doc, _score in results]
 
  # Format and return response including generated text and sources
  formatted_response = f"Response: {response_text}\nSources: {sources}"
  return formatted_response, response_text


# Ask questions

In [11]:
query_text="what are the characters are their in this story"
formatted_response, response_text = query_rag(query_text)
print(response_text)

  db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
  response_text = model.predict(prompt)


The characters in "The Jungle Book" include Mowgli, Baloo, Bagheera, Shere Khan, and Kaa.


In [12]:
query_text="who is author of this book"
formatted_response, response_text = query_rag(query_text)
print(response_text)

Rudyard Kipling


In [13]:
query_text="what is the theme of this story"
formatted_response, response_text = query_rag(query_text)
print(response_text)

The theme of "The Jungle Book" is about the importance of family, friendship, and the balance between the laws of the jungle and human society. It also explores themes of identity, belonging, and the struggle between civilization and nature.


In [14]:
query_text="who are the best friends"
formatted_response, response_text = query_rag(query_text)
print(response_text)

Based on the context provided, the best friends mentioned in the verses are Bhîßma, Kar√a, K®pa, Açvatthåmå, Vikar√a, Bhüriçravå, and Jayadratha.
