#Langchain

In [None]:
!pip install chromadb sentence-transformers
!pip install langchain_groq langchain_core langchain_community pypdf

In [None]:
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain_groq import ChatGroq
from langchain.document_loaders import DirectoryLoader , PyPDFLoader, CSVLoader
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os

def initialize_llm():
  llm=ChatGroq(
      temperature=0,
      groq_api_key='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',
      model_name='mixtral-8x7b-32768'
  )
  return llm

llm = initialize_llm()

##Langchain Main

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.agents import AgentType, initialize_agent
from langchain.tools import Tool
from langchain.memory import ConversationBufferMemory

# Load and process document
def load_document(file_path):
    loader = PyPDFLoader(file_path)
    pages = loader.load_and_split()
    return pages

# Store document in vector database
def create_vector_store(pages):
    embeddings=HuggingFaceBgeEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    vector_store=Chroma.from_documents(pages,embeddings,persist_directory = './chroma_db')
    vector_store.persist()
    return vector_store

def relevance_tool(question, retriever):
    docs = retriever.get_relevant_documents(question)
    return len(docs) > 0

def retrieval_tool(question, retriever, top_k=3):
    docs = retriever.get_relevant_documents(question)
    return docs[:top_k]

def answer_generation_tool(question, retrieved_docs, llm):
    context = "\n".join([doc.page_content for doc in retrieved_docs])
    prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
    return llm.predict(prompt)

def validation_tool(answer):
    return answer is not None and len(answer.strip()) > 0

def feedback_tool(answer, vector_store):
    user_feedback = input("Would you find this result helpful? (yes/no): ")
    if user_feedback.lower() == "yes":
        vector_store.add_texts([answer])
        print("Feedback stored in vector database.")
    else:
        print("Feedback not stored.")

def create_agents(retriever, llm, vector_store):
    tools = [
        Tool(name="Relevance Check", func=lambda q: relevance_tool(q, retriever), description="Checks if the question is relevant to the document"),
        Tool(name="Retrieval", func=lambda q: retrieval_tool(q, retriever), description="Retrieves the top 3 relevant documents"),
        Tool(name="Answer Generation", func=lambda q: answer_generation_tool(q, retrieval_tool(q, retriever), llm), description="Generates an answer based on retrieved documents"),
        Tool(name="Validation", func=lambda ans: validation_tool(ans), description="Validates the generated answer"),
        Tool(name="Feedback Storage", func=lambda ans: feedback_tool(ans, vector_store), description="Stores correct answers in vector database if user approves"),
    ]
    agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
    return agent

from langchain_groq import ChatGroq

def process_question(file_path, question):
    pages = load_document(file_path)
    vector_store = create_vector_store(pages)
    retriever = vector_store.as_retriever()
    agent = create_agents(retriever, llm, vector_store)
    response = agent.run(question)
    return response

In [None]:
pip install pypdf



In [None]:
pip install chromadb

In [None]:
question = "what are the acccounting qualities in tesla?"
result = process_question("/content/10-Q_-_Tesla_INC_-_10-24-2024.pdf", question)
print(result)

  embeddings=HuggingFaceBgeEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
  vector_store.persist()
  agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
  response = agent.run(question)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe user seems to be asking about the accounting qualities of Tesla, which could refer to the financial reporting practices or qualities of Tesla as a company. I should first check if this question is relevant to the document.

Action: Relevance Check
Action Input: what are the acccounting qualities in tesla?[0m
Observation: [36;1m[1;3mTrue[0m
Thought:

  docs = retriever.get_relevant_documents(question)


[32;1m[1;3mThe question is relevant to the document. Now, I should retrieve the top 3 relevant documents to find the answer.

Action: Retrieval
Action Input: what are the acccounting qualities in tesla?[0m
Observation: [33;1m[1;3m[Document(metadata={'author': 'Kaleidoscope - kscope.io', 'creationdate': '2024-10-24T10:11:17+00:00', 'creator': 'Chromium', 'keywords': 'Tesla INC 10-Q', 'moddate': '2024-10-24T10:11:18+00:00', 'page': 1, 'page_label': '2', 'producer': 'KS - PDF Engine v1.2', 'source': '/content/10-Q_-_Tesla_INC_-_10-24-2024.pdf', 'subject': '10-Q filed 10/24/2024', 'title': 'Form 10-Q for Tesla INC filed 10/24/2024', 'total_pages': 41}, page_content="TESLA, INC.\nFORM 10-Q FOR THE QUARTER ENDED SEPTEMBER 30, 2024\nINDEX\n  Page\nPART I. FINANCIAL INFORMATION\nItem 1. Financial Statements 4\nConsolidated Balance Sheets 4\nConsolidated Statements of Operations 5\nConsolidated Statements of Comprehensive Income 6\nConsolidated Statements of Redeemable Noncontrolling Inter

  return llm.predict(prompt)



Observation: [38;5;200m[1;3mBased on the provided financial statements, the following are some of the accounting qualities that can be observed in Tesla, Inc.:

1. Relevance: The financial statements provide information that is useful in making economic decisions, such as the company's revenues, cost of revenues, gross profit, operating expenses, income before and after taxes, and net income.
2. Reliability: The financial statements appear to be prepared in accordance with generally accepted accounting principles (GAAP), which enhances their reliability. The company has also engaged an independent auditor to audit its financial statements.
3. Comparability: The financial statements present the company's financial performance and position for the current and prior-year periods, allowing users to compare and analyze the company's performance over time.
4. Consistency: The company has applied consistent accounting principles and methods in the preparation of the financial statements fo

In [None]:
result

'The accounting qualities observed in Tesla, Inc. based on the provided financial statements include relevance, reliability, comparability, consistency, materiality, and understandability.'

##Another Approach for Langchain

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.agents import AgentType, initialize_agent
from langchain.tools import Tool
from langchain.memory import ConversationBufferMemory
from langchain.vectorstores import Chroma
from sentence_transformers import CrossEncoder
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq

# Load and process document
def load_document(file_path):
    loader = PyPDFLoader(file_path)
    pages = loader.load_and_split()
    return pages

# Store document in vector database
def create_vector_store(pages):
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    vector_store = Chroma.from_documents(pages, embeddings, persist_directory='./chroma_db')
    vector_store.persist()
    return vector_store

# Relevance check tool
def relevance_tool(question, retriever):
    docs = retriever.get_relevant_documents(question)
    return len(docs) > 0

# Load CrossEncoder model for ranking
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

def retrieval_tool(question, retriever, top_k=3):
    """Retrieve top_k documents and re-rank them."""
    docs = retriever.get_relevant_documents(question)  # Retrieve docs

    if not docs:
        return []

    # Prepare input pairs for re-ranking
    query_doc_pairs = [[question, doc.page_content] for doc in docs]

    # Compute relevance scores
    scores = reranker.predict(query_doc_pairs)

    # Sort documents by score (higher is better)
    ranked_docs = sorted(zip(docs, scores), key=lambda x: x[1], reverse=True)

    # Return the top-ranked documents
    return [doc for doc, score in ranked_docs[:top_k]]

# Answer generation
def answer_generation_tool(question, retrieved_docs, llm):
    context = "\n".join([doc.page_content for doc in retrieved_docs])
    prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
    return llm.predict(prompt)

# Answer validation
def validation_tool(answer):
    return answer is not None and len(answer.strip()) > 0

# User feedback storage
def feedback_tool(answer, vector_store):
    user_feedback = input("Would you find this result helpful? (yes/no): ")
    if user_feedback.lower() == "yes":
        vector_store.add_texts([answer])
        print("Feedback stored in vector database.")
    else:
        print("Feedback not stored.")

# Create and initialize agents
def create_agents(retriever, llm, vector_store):
    tools = [
        Tool(name="Relevance Check", func=lambda q: relevance_tool(q, retriever), description="Checks if the question is relevant to the document"),
        Tool(name="Retrieval", func=lambda q: retrieval_tool(q, retriever), description="Retrieves and re-ranks the top 3 relevant documents"),
        Tool(name="Answer Generation", func=lambda q: answer_generation_tool(q, retrieval_tool(q, retriever), llm), description="Generates an answer based on retrieved documents"),
        Tool(name="Validation", func=lambda ans: validation_tool(ans), description="Validates the generated answer"),
        Tool(name="Feedback Storage", func=lambda ans: feedback_tool(ans, vector_store), description="Stores correct answers in vector database if user approves"),
    ]
    agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
    return agent

# Process user question
def process_question(file_path, question):
    pages = load_document(file_path)
    vector_store = create_vector_store(pages)
    retriever = vector_store.as_retriever()
    llm = ChatGroq(
        temperature=0,
        groq_api_key='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',
        model_name='mixtral-8x7b-32768'

    ) # Initialize LLM
    agent = create_agents(retriever, llm, vector_store)
    response = agent.run(question)
    return response


In [None]:
question = "what are different revenue segments in tesla?"
result = process_question("/content/10-Q_-_Tesla_INC_-_10-24-2024.pdf", question)
print(result)

  embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThis question is relevant to the document as it asks about different parts of Tesla's revenue. I will first use the Relevance Check tool to confirm this.

Action: Relevance Check
Action Input: what are different revenue segments in tesla?[0m
Observation: [36;1m[1;3mTrue[0m
Thought:[32;1m[1;3mThe question is relevant to the document. Now, I will use the Retrieval tool to find the top 3 relevant documents that can help answer this question.

Action: Retrieval
Action Input: what are different revenue segments in tesla?[0m
Observation: [33;1m[1;3m[Document(metadata={'author': 'Kaleidoscope - kscope.io', 'creationdate': '2024-10-24T10:11:17+00:00', 'creator': 'Chromium', 'keywords': 'Tesla INC 10-Q', 'moddate': '2024-10-24T10:11:18+00:00', 'page': 4, 'page_label': '5', 'producer': 'KS - PDF Engine v1.2', 'source': '/content/10-Q_-_Tesla_INC_-_10-24-2024.pdf', 'subject': '10-Q filed 10/24/2024', 'title': 'Form 10-Q for Tes

In [None]:
pip install gradio

##Tried with Gradio UI

In [None]:
import gradio as gr
from langchain.document_loaders import PyPDFLoader
from langchain.agents import AgentType, initialize_agent
from langchain.tools import Tool
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Chroma
from langchain_groq import ChatGroq

# Load and process document
def load_document(file_path):
    loader = PyPDFLoader(file_path)
    pages = loader.load_and_split()
    return pages

# Store document in vector database
def create_vector_store(pages):
    embeddings = HuggingFaceBgeEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    vector_store = Chroma.from_documents(pages, embeddings, persist_directory='./chroma_db')
    vector_store.persist()
    return vector_store

# Relevance Check
def relevance_tool(question, retriever):
    docs = retriever.get_relevant_documents(question)
    return len(docs) > 0

# Retrieval with Re-ranking
def retrieval_tool(question, retriever, top_k=3):
    docs = retriever.get_relevant_documents(question)
    return docs[:top_k]

# Answer Generation
def answer_generation_tool(question, retrieved_docs, llm):
    context = "\n".join([doc.page_content for doc in retrieved_docs])
    prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
    return llm.predict(prompt)

# Answer Validation
def validation_tool(answer):
    return answer is not None and len(answer.strip()) > 0

# Feedback Storage
def feedback_tool(answer, vector_store):
    user_feedback = input("Would you find this result helpful? (yes/no): ")
    if user_feedback.lower() == "yes":
        vector_store.add_texts([answer])
        print("Feedback stored in vector database.")
    else:
        print("Feedback not stored.")

# Create Agents
def create_agents(retriever, llm, vector_store):
    tools = [
        Tool(name="Relevance Check", func=lambda q: relevance_tool(q, retriever), description="Checks if the question is relevant to the document"),
        Tool(name="Retrieval", func=lambda q: retrieval_tool(q, retriever), description="Retrieves the top 3 relevant documents"),
        Tool(name="Answer Generation", func=lambda q: answer_generation_tool(q, retrieval_tool(q, retriever), llm), description="Generates an answer"),
        Tool(name="Validation", func=lambda ans: validation_tool(ans), description="Validates the generated answer"),
        Tool(name="Feedback Storage", func=lambda ans: feedback_tool(ans, vector_store), description="Stores correct answers if user approves"),
    ]
    agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
    return agent

# Load LLM (Replace with your preferred model)
llm = ChatGroq(
    temperature=0,
    groq_api_key='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',
    model_name='mixtral-8x7b-32768'
)

# Store document and retriever globally
vector_store = None
retriever = None

# File Upload Handling
def upload_pdf(file):
    global vector_store, retriever
    file_path = file.name
    pages = load_document(file_path)
    vector_store = create_vector_store(pages)
    retriever = vector_store.as_retriever()
    return "PDF uploaded and processed successfully!"

# Process Question
def process_question(question):
    global retriever, vector_store
    if not retriever or not vector_store:
        return "Please upload a document first."
    agent = create_agents(retriever, llm, vector_store)
    response = agent.run(question)
    return response

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# Multi-Agent Document Q&A System")
    gr.Markdown("Upload a PDF, then ask questions about its content.")

    with gr.Row():
        pdf_input = gr.File(label="Upload PDF Document")
        upload_button = gr.Button("Process PDF")

    question_input = gr.Textbox(label="Enter your question")
    answer_output = gr.Textbox(label="Answer", interactive=False)
    ask_button = gr.Button("Ask")

    upload_button.click(upload_pdf, inputs=[pdf_input], outputs=None)
    ask_button.click(process_question, inputs=[question_input], outputs=[answer_output])

demo.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6c48ce4a502bf86afa.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


