In [1]:
import os
os.environ["OPENAI_API_KEY"]="Insert Your OPENAI AIP KEY"


In [3]:
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader

CHROMA_PATH = r".\chroma1"


In [5]:
PROMPT_TEMPLATE = """
Answer the question based only on the following context:
{context}
 - -
Answer the question based on the above context: {question}
"""

In [7]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

In [9]:
def query_rag(query_text):
  """
  Query a Retrieval-Augmented Generation (RAG) system using Chroma database and OpenAI.
  Args:
    - query_text (str): The text to query the RAG system with.
  Returns:
    - formatted_response (str): Formatted response including the generated text and sources.
    - response_text (str): The generated response text.
  """
  # YOU MUST - Use same embedding function as before
  embedding_function = OpenAIEmbeddings()

  # Prepare the database
  db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
  
  # Retrieving the context from the DB using similarity search
  results = db.similarity_search_with_relevance_scores(query_text, k=3)

  # Check if there are any matching results or if the relevance score is too low
  # if len(results) == 0 or results[0][1] < 0.7:
  #   print(f"Unable to find matching results.")

  # Combine context from matching documents
  context_text = "\n\n - -\n\n".join([doc.page_content for doc, _score in results])
 
  # Create prompt template using context and query text
  prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
  prompt = prompt_template.format(context=context_text, question=query_text)
  
  # Initialize OpenAI chat model
  model = ChatOpenAI()

  # Generate response text based on the prompt
  response_text = model.predict(prompt)
 
   # Get sources of the matching documents
  sources = [doc.metadata.get("source", None) for doc, _score in results]
 
  # Format and return response including generated text and sources
  formatted_response = f"Response: {response_text}\nSources: {sources}"
  return formatted_response, response_text


In [13]:
!pip install chromadb

Collecting chromadb
  Downloading chromadb-1.0.13-cp39-abi3-win_amd64.whl.metadata (7.1 kB)
Collecting build>=1.0.3 (from chromadb)
  Using cached build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.1-cp312-cp312-win_amd64.whl.metadata (8.7 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Using cached posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Using cached onnxruntime-1.22.0-cp312-cp312-win_amd64.whl.metadata (5.0 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Using cached opentelemetry_exporter_otlp_proto_grpc-1.34.1-py3-none-any.whl.metadata (2.4 kB)
Collecting tokenizers>=0.13.2 (from chromadb)
  Using cached tokenizers-0.21.1-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Collecting pypika>=0.48.9 (from chromadb)
  Using cached pypika-0.48.9-py2.py3-none-any.whl
Collecting importlib-resources (from chromadb)
  Using cached importlib

  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
streamlit 1.32.0 requires protobuf<5,>=3.20, but you have protobuf 5.29.5 which is incompatible.
streamlit 1.32.0 requires tenacity<9,>=8.1.0, but you have tenacity 9.1.2 which is incompatible.


In [15]:
query_text="what krishna told to Arjuna in chapter 2"
formatted_response, response_text = query_rag(query_text)
print(response_text)

  response_text = model.predict(prompt)


Krishna told Arjuna in Chapter 2 of the Bhagavad Gita about the importance of fulfilling one's duty as a warrior without attachment to the results of their actions. He explained the concept of dharma and the eternal nature of the soul.


In [17]:
query_text="What is kurukshetra?"
formatted_response, response_text = query_rag(query_text)
print(response_text)

Kurukshetra is a place or location.


In [19]:
query_text="Who is Arjuna?"
formatted_response, response_text = query_rag(query_text)
print(response_text)

Arjuna is a character from the Indian epic, the Mahabharata.


In [21]:
query_text="FOr which Arjuna is popular for?"
formatted_response, response_text = query_rag(query_text)
print(response_text)

Arjuna is popular for his role in the Hindu epic Mahabharata.


In [23]:
query_text="FOr which skill Arjuna is popular for?"
formatted_response, response_text = query_rag(query_text)
print(response_text)

Archery.


# 1. get the embeddings for the query
# 2. get the context from the chroma related to embeddings
# 3. formulate the context based on query
# 4. Ask the llm with the  context and query



In [31]:
query_text="what are various yogas as mentioned in Bhagavadgita"
formatted_response, response_text = query_rag(query_text)
print(response_text)

The various yogas mentioned in the Bhagavadgita are Karma Yoga, Bhakti Yoga, Jnana Yoga, and Dhyana Yoga.


In [29]:
query_text="Provide me names of Pandavs?"
formatted_response, response_text = query_rag(query_text)
print(response_text)

- Yudhishthira
- Bhima
- Arjuna
- Nakula
- Sahadeva


In [27]:
query_text="what are various yogas as mentioned in Bhagavadgita"
formatted_response, response_text = query_rag(query_text)
print(response_text)

The various yogas mentioned in the Bhagavad Gita are Karma Yoga (Path of Selfless Service), Bhakti Yoga (Path of Devotion), Jnana Yoga (Path of Knowledge), and Dhyana Yoga (Path of Meditation).
