**Imporing Libraries**

In [2]:
import openai 
import langchain
import pinecone
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter



**Loading Enviromental Variables**

**Reading Documents Helper Function**

In [3]:
def read_doc(directory):
    file_loader= PyPDFDirectoryLoader(directory)
    documents=file_loader.load()
    return documents
    

In [6]:
doc=read_doc('Law Dataset')
len(doc)

259

**Chunking Document**

In [7]:
## Divide the docs into chunks
### https://api.python.langchain.com/en/latest/text_splitter/langchain.text_splitter.RecursiveCharacterTextSplitter.html#
def chunk_data(docs,chunk_size=800,chunk_overlap=50):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=chunk_size,chunk_overlap=chunk_overlap)
    doc=text_splitter.split_documents(docs)
    return docs

In [None]:
documents=chunk_data(docs=doc)
len(documents)

259

**Setting up *Pinecone* Index** 

In [9]:

import os
import time
import getpass
from pinecone import Pinecone, ServerlessSpec

if not os.getenv("PINECONE_API_KEY"):
    os.environ["PINECONE_API_KEY"] = getpass.getpass("Enter your Pinecone API key: ")

pinecone_api_key = os.environ.get("PINECONE_API_KEY")

pc = Pinecone(api_key=pinecone_api_key)

In [11]:
from dotenv import load_dotenv
load_dotenv()

True

In [14]:
import time

index_name = "raglegal"  

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region=os.getenv("PINECONE_ENV")),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

In [15]:
import getpass

os.environ["OPENAI_API_KEY"]= os.getenv("OPENAI_API_KEY")

from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

In [16]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

**Adding *Documents* to *Vector Store***

In [17]:
from uuid import uuid4
uuids = [str(uuid4()) for _ in range(len(documents))]
vector_store.add_documents(documents=documents, ids=uuids)

['b060eae1-dce9-4d93-b3a2-a6e2dc455b1b',
 '99298eed-8b72-4631-b775-7880d303a1fc',
 'efb89a04-c6bf-44dc-871d-cac6a80b89e5',
 '8af60efd-dc66-4be5-82c2-c71f6498247f',
 '63302a04-1de4-430f-bcdf-3041dd1d564b',
 '99b35af1-3d8a-451a-b3bf-7509ecef963a',
 '29af8488-624a-45e4-a3e0-a36510fda66a',
 '7a580124-1eb2-4391-b70b-0bed545fac5a',
 '08a498df-b1a5-4e76-9fd2-9590c551c2ff',
 '5d43d33c-c316-4816-a918-765517f50184',
 'bcbdaf11-c0ea-4fae-b1a8-d4730ea1c846',
 '7aa68a53-fac0-4b4f-823f-7d46550319fd',
 'c1ceaa66-8bc9-4cc1-a695-d84e80169380',
 '6f86fce3-54bf-4a9a-9754-f2b9e897cc04',
 '26bbf17f-68b7-4408-a5dd-4452cb6e0596',
 'efb70c0a-7570-443e-b031-c14800f401cf',
 'bbae9d26-d580-48c7-a059-b1c552f7f916',
 '8b9f5738-83ac-4a18-9d10-5a76a5f1b99f',
 'aff087f0-0b74-4abd-9981-2b6061cb4ca7',
 '3bb66bb9-5044-445e-b85b-a5caf33ab1e3',
 'd5e5dd82-b547-436e-a1ec-9454ccba8245',
 '28649efa-c496-4bb8-8f73-6f17766c30a8',
 '5b56119f-1194-4312-8dc1-cbbd8b047d6d',
 '21bdc89b-5c42-4821-bce4-5ba14b07d69c',
 '9ce40ded-151a-

**Setting Up *Vector Store Retriever***

In [18]:
retriever = vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 1, "score_threshold": 0.5},
)
retriever.invoke("Breifly describe the Islamabad University Act", filter={"source": "news"})

[Document(id='ec87ca24-3c9e-43b8-b51d-4c6f30921a25', metadata={'page': 2.0, 'source': 'Law Dataset\\THE ISLAMABAD UNIVERSITY OF HEALTH SCIENCES AND.pdf'}, page_content=' \n  \nPage 3 of 37 \n  \nTHE ISLAMABAD UNIVERSITY OF HEALTH SCIENCES AND \nEMERGING TECHNOLOGIES, ISLAMABAD ACT, 2024  \n \n[4th April, 2024]  \nACT NO. I OF 2024  \n \nAn Act for the establishment of Islamabad University of Health Sciences and Emerging  \nTechnologies  \n  \n  WHEREAS it is expedient to establish Islamabad University of Health Sciences and  \nEmerging Technologies (IUHT) at Islamabad for the purposes appearing hereinafter:  \n \nCHAPTER -I \nPRELIMINARY  \n \n   1.  Short title and commencement .―(1) This Act may be called the Islamabad University \nof Health Sciences and Emerging Technologies, Islamabad Act, 2024.  \n \n (2)  It shall extend to the Islamabad Capital Territory.  \n \n (3)  It shall come into force at once.  \n \n 2.  Definitions .―In this Act, unless there is anything repugn ant in th

**Setting up *Tavirly Tool* for additional context retreival and websearching.**

In [22]:
from langchain_community.tools import TavilySearchResults

# Initialize the TavilySearchResults tool with custom settings
tavirly_search_tool = TavilySearchResults(
    max_results=5,  # Limit the number of search results
    search_depth="advanced",  # Use advanced search depth
    include_answer=True,  # Include an answer to the query in the results
    include_raw_content=True,  # Include the raw content from the search results
    include_images=True,  # Include image results
    name="Tavily_search",  # Custom name for the tool
    description="This tool searches the web using Tavirly and returns up to 5 results with advanced search depth, including raw content and images."  # Custom description
)


In [23]:
tools = [tavirly_search_tool.as_tool()]

  tools = [tavirly_search_tool.as_tool()]


In [19]:
import getpass
import os

if "GROQ_API_KEY" not in os.environ:
    os.environ["GROQ_API_KEY"] = getpass.getpass("Enter your Groq API key: ")

**Setup Large Language Model**

In [24]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    name="chat_groq",
    model="llama3-groq-70b-8192-tool-use-preview",
    temperature=0,
    max_tokens=8000,  # You can adjust this as needed
    timeout=60,  # Set appropriate timeout based on complexity
    max_retries=2,
    # other params...
)


**Add Prompt Template**

In [26]:
from langchain.prompts import PromptTemplate

prompt_template = """
You are an advanced AI legal assistant with access to multiple tools and a vector database of legal documents. Your task is to:

1. **Understand the user's legal query.**
2. **Retrieve relevant legal documents from the vector database if needed.**
3. **Use the appropriate tool to analyze, compare, or explain legal documents based on the context.**
4. **Synthesize the results and generate a final response that combines the vector store results and tool outputs.**

**Steps**:

- If the query involves **specific legal information or content search**, query the vector database for related documents.
- If the query involves **comparing legal documents**, retrieve the relevant documents and perform a detailed comparison highlighting similarities and differences.
- If the query requires **explaining legal provisions or clauses**, interpret the relevant sections and provide a clear, concise explanation.
- If the query involves **recent legal developments or case law**, use the Legal Research tool to find up-to-date information.

**Remember** to combine all relevant information in a cohesive and professional response, ensuring accuracy and clarity.

**Examples**:

- **Query**: "Compare the data protection regulations between Country A and Country B."
  - **Action**: Retrieve the relevant legal texts from the vector database, then analyze and compare the key differences and similarities in data protection laws between the two countries.
- **Query**: "Explain the implications of the new Cybersecurity Act."
  - **Action**: Retrieve the Act from the vector database and provide a summary explaining its main provisions and potential impact.
- **Query**: "What are the recent changes in employment law affecting overtime pay?"
  - **Action**: Use the Legal Research tool to find recent amendments or case law related to employment and overtime pay, then summarize the findings.

Now, proceed with the user's query.
"""


**Create ReAct Agenet**

In [28]:

from langchain.agents import AgentType, initialize_agent
from langchain_openai import ChatOpenAI

agent = initialize_agent(
    tools=tools,
    llm=llm,
    prompt=prompt_template,
    vector_store=vector_store,
    agent=AgentType.OPENAI_FUNCTIONS,
    verbose=True,
    handle_parsing_errors=True,
)

  agent = initialize_agent(


In [29]:
# Example query
query = "Breifly Describe the Islamabad University Act?"

# Running the agent with the initialized prompt and query
response = agent.run(query)

# Output the response
print("Agent Response:")
print(response)


  response = agent.run(query)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe Islamabad University Act, also known as the Quaid-i-Azam University Act, was enacted in 1967. It established Quaid-i-Azam University, which is one of the top universities in Pakistan. The act aimed to provide for the establishment of a university in the capital city of Islamabad, with the goal of promoting higher education and research in various fields. The university was named after the founder of Pakistan, Quaid-i-Azam Muhammad Ali Jinnah.[0m

[1m> Finished chain.[0m
Agent Response:
The Islamabad University Act, also known as the Quaid-i-Azam University Act, was enacted in 1967. It established Quaid-i-Azam University, which is one of the top universities in Pakistan. The act aimed to provide for the establishment of a university in the capital city of Islamabad, with the goal of promoting higher education and research in various fields. The university was named after the founder of Pakistan, Quaid-i-Azam Muhammad Al

**Lock and Freeze Requirments for Export**

In [30]:
%pip freeze > requirement.txt

Note: you may need to restart the kernel to use updated packages.
