In [None]:
!pip install langchain
!pip install langchain-community
!pip install langchain-groq
!pip install langchain-core
!pip install gpt4all
!pip install langgraph==0.3.1
# !pip install chromadb
!pip install sentence-transformers
!pip install tavily-python
!pip install gradio
!pip install langchain-huggingface
!pip install pypdf
!pip install faiss-cpu

In [None]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders.web_base import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import nest_asyncio
# from langchain_community.vectorstores.chroma import Chroma
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_groq.chat_models import ChatGroq
from langchain_community.tools import DuckDuckGoSearchResults
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.prompts import ChatPromptTemplate
# from pydantic import BaseModel, Field
from langchain_core.pydantic_v1 import BaseModel,Field
from typing import Literal
from langchain.chains.combine_documents import stuff
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser
from IPython.core.display import Markdown
import json
import re
from langchain_core.runnables import (
    RunnableParallel,
    RunnableBranch,
    RunnablePassthrough,
)
from langchain_core.messages import HumanMessage, AIMessage
from operator import itemgetter
import asyncio
import warnings

warnings.filterwarnings("ignore")

In [None]:
import os
from dotenv import load_dotenv, dotenv_values
# loading variables from .env file
load_dotenv()

from google.colab import userdata

# accessing and printing value
GROQ_API_KEY = userdata.get("GROQ_API_KEY")
LANGCHAIN_API_KEY = userdata.get("LANGCHAIN_API_KEY")
LANGCHAIN_PROJECT = userdata.get("LANGCHAIN_PROJECT")
TAVILY_API_KEY = userdata.get("TAVILY_API_KEY")

os.environ["GROQ_API_KEY"] = GROQ_API_KEY
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"]=LANGCHAIN_API_KEY
os.environ["LANGCHAIN_PROJECT"]="Agentic RAG"
os.environ["TAVILY_API_KEY"]=TAVILY_API_KEY

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
def parse_search_research(results: str):
    pattern = r"\[content: (.*?), title: (.*?), url: (.*?)\]"
    result = re.findall(pattern, results)

    data_list = []
    for snippet, title, link in result:
        data_list.append({"content": snippet, "title": title, "url": link})
    return data_list

In [None]:
# Load FAISS index from disk
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings


faiss_folder = "/content/drive/MyDrive/faiss_index/faiss_index"
faiss_index_path = os.path.join(faiss_folder, "faiss_index")

embedding_function = HuggingFaceEmbeddings(show_progress=True, multi_process=True)
vector_store = FAISS.load_local(faiss_index_path, embedding_function, allow_dangerous_deserialization=True)

# Define retriever
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

In [None]:
class VectorStore(BaseModel):
    (
        "A vectorstore contains information about corporate laws, "
        "rules, and regulations related to companies, startups, compliance, "
        "director responsibilities, shareholder rights, taxation, and other "
        "corporate legal aspects."
    )

    query: str


class SearchEngine(BaseModel):
    """A search engine for searching other corporate legal information on the web"""

    query: str

class SearchEngine(BaseModel):
    """A search engine for searching other medical information on the web"""

    query: str

router_prompt_template = (
    "You are an expert in routing user queries to either a VectorStore or a SearchEngine.\n"
    "Use SearchEngine for all other corporate law-related queries that are not already present in the VectorStore or any quesry which demands latest updates.\n"
    "The VectorStore contains information on corporate laws, compliance, company formation, director duties, "
    "shareholder rights, and taxation.\n"
    'Note that if a query is not related to corporate laws, you must output "not corporate law-related", '
    "don't try to use any tool.\n\n"
    "State the answers according to Indian laws only"
    "query: {query}"
)

llm = ChatGroq(model="Llama3-70b-8192", temperature=0)
prompt = ChatPromptTemplate.from_template(router_prompt_template)
question_router = prompt | llm.bind_tools(tools=[VectorStore, SearchEngine])

In [None]:
from langchain_core.pydantic_v1 import validator


class Grader(BaseModel):
    "Use this format to give a binary score for relevance check on retrived documents."

    grade: Literal["relevant", "irrelevant"] = Field(
        ...,
        description="The relevance score for the document.\n"
        "Set this to 'relevant' if the given context is relevant to the user's query, or 'irrlevant' if the document is not relevant.",
    )

    @validator("grade", pre=True)
    def validate_grade(cls, value):
        if value == "not relevant":
            return "irrelevant"
        return value


grader_system_prompt_template = """"You are a grader tasked with assessing the relevance of a given context to a query.
    If the context is relevant to the query, score it as "relevant". Otherwise, give "irrelevant".
    Do not answer the actual answer, just provide the grade in JSON format with "grade" as the key, without any additional explanation."
    """

grader_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", grader_system_prompt_template),
        ("human", "context: {context}\n\nquery: {query}"),
    ]
)


grader_chain = grader_prompt | llm.with_structured_output(Grader, method="json_mode")

# query = "Importance of Corporate laws"
# context =vector_store.similarity_search(query)

# response = grader_chain.invoke({"query": query, "context": context})

In [None]:
rag_template_str = (
    "You are a helpful assistant. Answer the query below based only on the provided context. You are an expert in answering queries answer in a good manner like a professional\n\n"
    "Do not give answers like 'as evident from the page content of the first document' that is, do not say that you are giving answer from documents"
    "also dont mention their addresses like this '(id='50db2a45-2acc-44a9-9199-f754961e852b')'"
    "context: {context}\n\n"
    "query: {query}"
)


rag_prompt = ChatPromptTemplate.from_template(rag_template_str)
rag_chain = rag_prompt | llm | StrOutputParser()

# query = "What is the difference between a public limited company and a private limited company?"
# context = vector_store.similarity_search(query)

# response = rag_chain.invoke({"query": query, "context": context})

# Markdown(response)

In [None]:
fallback_prompt = ChatPromptTemplate.from_template(
    (
        "You are a friendly legal assistant created by JuridiCo.\n"
        "Do not respond to queries that are not related to corporate law.\n"
        "If a query is not related to law, acknowledge your limitations.\n"
        "Provide concise responses to only legally-related queries.\n\n"
        "Current conversations:\n\n{chat_history}\n\n"
        "human: {query}"
    )
)

fallback_chain = (
    {
        "chat_history": lambda x: "\n".join(
            [
                (
                    f"human: {msg.content}"
                    if isinstance(msg, HumanMessage)
                    else f"AI: {msg.content}"
                )
                for msg in x["chat_history"]
            ]
        ),
        "query": itemgetter("query") ,
    }
    | fallback_prompt
    | llm
    | StrOutputParser()
)

fallback_chain.invoke(
    {
        "query": "Hello",
        "chat_history": [],
    }
)

In [None]:
class HallucinationGrader(BaseModel):
    "Binary score for hallucination check in llm's response"

    grade: Literal["yes", "no"] = Field(
        ..., description="'yes' if the llm's reponse is hallucinated otherwise 'no'"
    )


hallucination_grader_system_prompt_template = (
    "You are a grader assessing whether a response from an llm is based on a given context.\n"
    "If the llm's response is not based on the given context give a score of 'yes' meaning it's a hallucination"
    "otherwise give 'no'\n"
    "Just give the grade in json with 'grade' as a key and a binary value of 'yes' or 'no' without additional explanation"
)

hallucination_grader_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", hallucination_grader_system_prompt_template),
        ("human", "context: {context}\n\nllm's response: {response}"),
    ]
)


hallucination_grader_chain = (
    RunnableParallel(
        {
            "response": itemgetter("response"),
            "context": lambda x: "\n\n".join([c.page_content for c in x["context"]]),
        }
    )
    | hallucination_grader_prompt
    | llm.with_structured_output(HallucinationGrader, method="json_mode")
)

# query = "What is the difference between a public limited company and a private limited company?"
# context = retriever.get_relevant_documents(query)
# response = """Based on the provided context, specifically from page 27 of the "Companies Act.pdf" document, the difference between a public limited company and a private limited company lies in the suffix of their names.
# A public limited company has the last word "Limited" in its name, whereas a private limited company has the last words "Private Limited" in its name.
# No other differences are explicitly mentioned in the provided context.
# """

# response = hallucination_grader_chain.invoke({"response": response, "context": context})

In [None]:
class AnswerGrader(BaseModel):
    "Binary score for an answer check based on a query."

    grade: Literal["yes", "no"] = Field(
        ...,
        description="'yes' if the provided answer is an actual answer to the query otherwise 'no'",
    )


answer_grader_system_prompt_template = (
    "You are a grader assessing whether a provided answer is in fact an answer to the given query.\n"
    "If the provided answer does not answer the query give a score of 'no' otherwise give 'yes'\n"
    "Just give the grade in json with 'grade' as a key and a binary value of 'yes' or 'no' without additional explanation"
)

answer_grader_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", answer_grader_system_prompt_template),
        ("human", "query: {query}\n\nanswer: {response}"),
    ]
)


answer_grader_chain = answer_grader_prompt | llm.with_structured_output(
    AnswerGrader, method="json_mode"
)

# query = "What is the difference between a public limited company and a private limited company?"
# # context = retriever.get_relevant_documents(query)
# response = """Based on the provided context, specifically from page 27 of the "Companies Act.pdf" document, the difference between a public limited company and a private limited company lies in the suffix of their names.
# A public limited company has the last word "Limited" in its name, whereas a private limited company has the last words "Private Limited" in its name.
# No other differences are explicitly mentioned in the provided context."""

# response = answer_grader_chain.invoke({"response": response, "query": query})

In [None]:
from typing import TypedDict, Annotated
from langchain_core.documents import Document
from langchain_community.tools import DuckDuckGoSearchResults
# from langgraph.prebuilt.tool_executor import ToolExecutor, ToolInvocation
# import langgraph
# from langgraph.prebuilt import ToolInvocation, ToolExecutor
# from langgraph.prebuilt.tool_node import ToolNode
from langchain_core.messages.base import BaseMessage
import operator

# ddg_search = DuckDuckGoSearchResults()
tavily_search = TavilySearchResults()
# tool_executor = ToolExecutor(
#     tools=[
#         Tool(
#             name="VectorStore",
#             func=retriever.invoke,
#             description="Useful to search the vector database",
#         ),
#         Tool(
#             name="SearchEngine", func=tavily_search, description="Useful to search the web"
#         ),
#     ]
# )


class AgentSate(TypedDict):
    """The dictionary keeps track of the data required by the various nodes in the graph"""

    query: str
    chat_history:list[BaseMessage]
    generation: str
    documents: list[Document]


def retrieve_node(state: dict) -> dict[str, list[Document] | str]:
    """
    Retrieve relevent documents from the vectorstore

    query: str

    return list[Document]
    """
    query = state["query"]
    documents = retriever.invoke(input=query)
    return {"documents": documents}


def fallback_node(state: dict):
    """
    Fallback to this node when there is no tool call
    """
    query = state["query"]
    chat_history = state["chat_history"]
    generation = fallback_chain.invoke({"query": query, "chat_history": chat_history})
    return {"generation": generation}


def filter_documents_node(state: dict):
    filtered_docs = list()

    query = state["query"]
    documents = state["documents"]
    for i, doc in enumerate(documents, start=1):
        grade = grader_chain.invoke({"query": query, "context": doc})
        if grade.grade == "relevant":
            print(f"---CHUCK {i}: RELEVANT---")
            filtered_docs.append(doc)
        else:
            print(f"---CHUCK {i}: NOT RELEVANT---")
    return {"documents": filtered_docs}


def rag_node(state: dict):
    query = state["query"]
    documents = state["documents"]

    generation = rag_chain.invoke({"query": query, "context": documents})
    return {"generation": generation}


def web_search_node(state: dict):
    query = state["query"]
    results = tavily_search.invoke(query)
    results = parse_search_research(results)
    documents = [
        Document(page_content=doc["content"], metadata={"source": doc["url"]})
        for doc in results
    ]
    return {"documents": documents}


def question_router_node(state: dict):
    query = state["query"]
    try:
        response = question_router.invoke({"query": query})
    except Exception:
        return "llm_fallback"

    if "tool_calls" not in response.additional_kwargs:
        print("---No tool called---")
        return "llm_fallback"

    if len(response.additional_kwargs["tool_calls"]) == 0:
        raise "Router could not decide route!"

    route = response.additional_kwargs["tool_calls"][0]["function"]["name"]
    if route == "VectorStore":
        print("---Routing to VectorStore---")
        return "VectorStore"
    elif route == "SearchEngine":
        print("---Routing to SearchEngine---")
        return "SearchEngine"


def should_generate(state: dict):
    filtered_docs = state["documents"]

    if not filtered_docs:
        print("---All retrived documents not relevant---")
        return "SearchEngine"
    else:
        print("---Some retrived documents are relevant---")
        return "generate"


def hallucination_and_answer_relevance_check(state: dict):
    llm_response = state["generation"]
    documents = state["documents"]
    query = state["query"]

    hallucination_grade = hallucination_grader_chain.invoke(
        {"response": llm_response, "context": documents}
    )
    if hallucination_grade.grade == "no":
        print("---Hallucination check passed---")
        answer_relevance_grade = answer_grader_chain.invoke(
            {"response": llm_response, "query": query}
        )
        if answer_relevance_grade.grade == "yes":
            print("---Answer is relevant to question---\n")
            return "useful"
        else:
            print("---Answer is not relevant to question---")
            return "not useful"
    print("---Hallucination check failed---")
    return "generate"

In [None]:
from langgraph.graph import StateGraph, END

workflow = StateGraph(AgentSate)
workflow.add_node("VectorStore", retrieve_node)
workflow.add_node("SearchEngine", web_search_node)
workflow.add_node("filter_docs", filter_documents_node)
workflow.add_node("fallback", fallback_node)
workflow.add_node("rag", rag_node)

workflow.set_conditional_entry_point(
    question_router_node,
    {
        "llm_fallback": "fallback",
        "VectorStore": "VectorStore",
        "SearchEngine": "SearchEngine",
    },
)

workflow.add_edge("VectorStore", "filter_docs")
workflow.add_edge("SearchEngine", "filter_docs")
workflow.add_conditional_edges(
    "filter_docs", should_generate, {"SearchEngine": "SearchEngine", "generate": "rag"}
)
workflow.add_conditional_edges(
    "rag",
    hallucination_and_answer_relevance_check,
    {"useful": END, "not useful": "SearchEngine", "generate": "rag"},
)

workflow.add_edge("fallback", END)

In [None]:
response = app.invoke({"query": "What are the company laws?", "chat_history": []})
Markdown(response["generation"])

In [None]:
import gradio as gr
# ... (Your existing imports and code)

def api_calling(prompt, history):
    try:
        response = app.invoke({"query": prompt, "chat_history": history})
        # Assuming 'generation' key holds the LLM response
        return response["generation"]
    except Exception as e:
        return f"An error occurred: {e}"  # Return an error message if needed

answer_bot = gr.ChatInterface(
    fn=api_calling,
    chatbot=gr.Chatbot(height=600),
    textbox=gr.Textbox(placeholder="Ask me about law!", container=False, scale=7),
    title="JuridiCo - Corporate Law Chatbot",
    description="Welcome to JuridiCo! Ask a legal question, and the bot will provide you with an AI-generated response and any relevant legal documents.",
    theme="soft",
    submit_btn="Send",
)

answer_bot.launch(share=True)