In [2]:
!pip install langchain langchain-experimental langchain-community langchain-openai openai chromadb pypdf sentence_transformers gradio langchain-together

Collecting langchain
  Downloading langchain-0.2.12-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-experimental
  Downloading langchain_experimental-0.0.64-py3-none-any.whl.metadata (1.7 kB)
Collecting langchain-community
  Downloading langchain_community-0.2.11-py3-none-any.whl.metadata (2.7 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.1.21-py3-none-any.whl.metadata (2.6 kB)
Collecting openai
  Downloading openai-1.40.3-py3-none-any.whl.metadata (22 kB)
Collecting chromadb
  Downloading chromadb-0.5.5-py3-none-any.whl.metadata (6.8 kB)
Collecting pypdf
  Downloading pypdf-4.3.1-py3-none-any.whl.metadata (7.4 kB)
Collecting sentence_transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Collecting gradio
  Downloading gradio-4.41.0-py3-none-any.whl.metadata (15 kB)
Collecting langchain-together
  Downloading langchain_together-0.1.5-py3-none-any.whl.metadata (1.9 kB)
Collecting langchain-core<0.3.0,>=0.2.27 (from langchain)

In [3]:
!pip install PyPDF2

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/232.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


In [4]:
!pip install -U langchain-huggingface


Collecting langchain-huggingface
  Downloading langchain_huggingface-0.0.3-py3-none-any.whl.metadata (1.2 kB)
Downloading langchain_huggingface-0.0.3-py3-none-any.whl (17 kB)
Installing collected packages: langchain-huggingface
Successfully installed langchain-huggingface-0.0.3


In [7]:
from PyPDF2 import PdfFileReader
from sentence_transformers import SentenceTransformer
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.llms import Together

def setup_rag_model(pdf_path, together_api_key):
    # Load the PDF document
    loader = PyPDFLoader(pdf_path)
    pages = loader.load()

    # Extract text from each page and combine them into a single document
    document_text = " ".join([page.page_content for page in pages])

    # Create embeddings
    embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    db = Chroma.from_documents(pages, embedding_function)

    # Initialize the LLM
    llm = Together(
        model="meta-llama/Llama-2-70b-chat-hf",
        max_tokens=256,
        temperature=0,
        top_k=1,
        together_api_key=together_api_key
    )

    # Setup the retriever
    retriever = db.as_retriever(similarity_score_threshold=0.9)

    # Define the prompt template for Q&A
    prompt_template = """Please answer the following question based on the document provided:
    CONTEXT: {context}
    QUESTION: {question}"""
    PROMPT = PromptTemplate(template=f"[INST] {prompt_template} [/INST]", input_variables=["context", "question"])

    # Create the retrieval chain
    chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type='stuff',
        retriever=retriever,
        input_key='query',
        return_source_documents=True,
        chain_type_kwargs={"prompt": PROMPT},
        verbose=True
    )

    return chain, document_text

def ask_question(chain, document_text, question):
    response = chain({"query": question, "context": document_text})
    return response['result']

# Example usage
pdf_path = "/content/somatosensory 1.pdf"  # Replace with the correct path to your PDF document
together_api_key = "5c3aaa83cb623ccf29c3038870387e97d17f64112b00d431f55f6fc19e4d75ab"  # Your Together API key

# Validate the API key
if not together_api_key or together_api_key == "your_together_api_key":
    raise ValueError("Invalid Together API key provided. Please ensure you have a valid key.")

# Setup the RAG model
rag_chain, document_text = setup_rag_model(pdf_path, together_api_key)

# Example Questions
questions = [
    "What is the main topic of this document?",
    "What are the key points discussed?",
    "Can you provide details on the specific topic mentioned?",
]

# Get answers to the questions
for question in questions:
    answer = ask_question(rag_chain, document_text, question)
    print(f"Question: {question}")
    print(f"Answer: {answer}\n")




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
Question: What is the main topic of this document?
Answer:   The main topic of this document is the anatomy of the somatosensory system, which includes sensors in the skin and sensors in muscles, tendons, and joints that provide information about temperature, pressure, surface texture, pain, muscle length, muscle tension, and joint angles.



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
Question: What are the key points discussed?
Answer:   The key points discussed in the document include:

1. The somatosensory system consists of sensors in the skin and sensors in muscles, tendons, and joints.
2. Cutaneous receptors in the skin tell us about temperature, pressure, surface texture, and pain.
3. Rapidly adapting afferents from Meissner corpuscles and hair receptors at the roots of hairs respond to small displacements of objects during the early stages of lifting and lead to adjustments in g