In [36]:
print("OK")

OK


In [37]:
%pwd

'd:\\Generative AI\\Langchain-Project\\Law-Assistant'

In [38]:
import os
os.chdir("../")

In [39]:
%pwd

'd:\\Generative AI\\Langchain-Project'

In [9]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [10]:
from langchain_community.document_loaders import CSVLoader, DirectoryLoader, PyPDFLoader
from langchain.docstore.document import Document
import os

# Load all CSV files first
def load_csv_documents(folder_path):
    csv_docs = []
    for file in os.listdir(folder_path):
        if file.endswith(".csv"):
            loader = CSVLoader(file_path=os.path.join(folder_path, file))
            csv_docs.extend(loader.load())
    return csv_docs

# Load all PDFs after
def load_pdf_documents(folder_path):
    loader = DirectoryLoader(
        folder_path,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )
    return loader.load()

# Combine all documents: CSV first, then PDFs
def load_all_documents(folder_path):
    csv_docs = load_csv_documents(folder_path)
    pdf_docs = load_pdf_documents(folder_path)
    combined = csv_docs + pdf_docs  # CSV comes first
    print(f"Loaded {len(csv_docs)} CSV chunks and {len(pdf_docs)} PDF chunks")
    return combined


In [11]:
docs = load_all_documents("./")  # Root directory

Loaded 0 CSV chunks and 0 PDF chunks


In [12]:
print("Loaded document count:", len(docs))
if not docs:
    print("No documents were loaded.")


Loaded document count: 0
No documents were loaded.


In [40]:
from langchain_community.embeddings import HuggingFaceEmbeddings

def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

embedding_model = download_hugging_face_embeddings()


In [41]:
query_result = embedding_model.embed_query("Hello world")
print("Length:", len(query_result))

Length: 384


In [15]:
# query_result

In [16]:
from dotenv import load_dotenv
load_dotenv()

True

In [42]:
PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')
OPENAI_API_KEY=os.environ.get('OPENAI_API_KEY')

In [43]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec

# Initialize Pinecone client
pc = Pinecone(api_key=PINECONE_API_KEY)

# Set existing index name
index_name = "lawbot"

# No need to create it again — it already exists
# Just connect to it when needed


In [44]:
import os
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY


In [45]:
import os
from langchain_community.document_loaders import PyPDFLoader, CSVLoader, DirectoryLoader

def load_all_documents(folder_path):
    csv_docs = []
    pdf_docs = []

    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)
        if file.endswith(".csv"):
            loader = CSVLoader(file_path=file_path)
            csv_docs.extend(loader.load())
        elif file.endswith(".pdf"):
            loader = PyPDFLoader(file_path)
            pdf_docs.extend(loader.load())

    print(f"Loaded {len(csv_docs)} CSV chunks and {len(pdf_docs)} PDF chunks")
    return csv_docs + pdf_docs


In [24]:
docs = load_all_documents("Data/")


Loaded 77104 CSV chunks and 109 PDF chunks


In [25]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents(docs)

print(f"Total final chunks: {len(chunks)}")


Total final chunks: 115242


In [26]:
from langchain_pinecone import PineconeVectorStore
from langchain_community.embeddings import HuggingFaceEmbeddings

# Define index name and embedding model
index_name = "lawbot"  # replace with your actual Pinecone index name
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Load the existing Pinecone index
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embedding_model
)



In [27]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x1cac4bea710>

In [28]:
retriever = docsearch.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}
)


In [29]:
retrieved_docs = retriever.invoke("What is penal code OF OFFENCES RELATING TO THE ARMY, NAVY AND AIR FORCE?")


In [30]:
retrieved_docs

[Document(id='e28ae4f3-10db-406b-8145-cfa0eabbc7ab', metadata={'row': 417.0, 'source': 'Data/bdlaws_ds_formatted.csv'}, page_content=': 417\nlaw_descripton: \nlaw_pass_date: 6th October, 1860\nlaw_subtitle: \nlaw_title: The Penal Code, 1860\nsection_chapter_id: 3.0\nsection_chapter_name: OF OFFENCES RELATING TO THE ARMY, NAVY AND AIR FORCE\nsection_chapter_no: Chapter VII\nsection_description: 139. No person subject to the\nsection_id: 2862\nsection_name: Persons subject to certain acts\nurl_id: 11\nvol_ordinance: ACT NO. XLV OF 1860'),
 Document(id='55fb4930-85a8-4ecd-9f8c-9e6653f1b867', metadata={'row': 449.0, 'source': 'Data/bd_laws_translated_05022022.csv'}, page_content=': 449\nUnnamed: 0: 449.0\nUnnamed: 0.1: 449.0\nlaw_descripton: \nlaw_pass_date: 6th October, 1860\nlaw_subtitle: -\nlaw_title: The Penal Code, 1860\nsection_chapter_id: 3.0\nsection_chapter_name: OF OFFENCES RELATING TO THE ARMY, NAVY AND AIR FORCE\nsection_chapter_no: Chapter VII\nsection_description: 135. Whoeve

In [31]:
from langchain_openai import OpenAI

# Initialize OpenAI LLM
llm = OpenAI(
    temperature=0.4,        # Controls creativity (lower = more focused)
    max_tokens=500          # Maximum number of tokens to generate
)

In [32]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import Runnable
from langchain_core.documents import Document

# 1. Define system prompt
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise.\n\n"
    "{context}"
)

# 2. Create prompt template
prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}")
])


In [33]:
# Create the question-answering chain using the prompt and LLM
question_answer_chain = create_stuff_documents_chain(
    llm,
    prompt
)

# Create the RAG (Retrieval-Augmented Generation) chain using retriever
rag_chain = create_retrieval_chain(
    retriever,
    question_answer_chain
)


In [35]:
try:
    response = rag_chain.invoke({"input": "What is the penal code OF OFFENCES RELATING TO THE ARMY, NAVY AND AIR FORCE?"})
    print("Answer:", response["answer"])
except Exception as e:
    print("⚠️ OpenAI Error:", e)


⚠️ OpenAI Error: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}


In [59]:
import os
from dotenv import load_dotenv

load_dotenv()  # load keys from .env

PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")


In [60]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore

# Load embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Define existing index name
index_name = "lawbot"

# Load existing index (do not create again)
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embedding_model
)


In [61]:
retriever = docsearch.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}
)


In [62]:
from langchain_openai import OpenAI

llm = OpenAI(
    temperature=0.4,
    max_tokens=500
)


In [63]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

# System prompt
system_prompt = (
    "You are an assistant for question-answering tasks.\n"
    "Use the following pieces of retrieved context to answer the question.\n"
    "If you don't know the answer, say you don't know.\n"
    "Limit your answer to three concise sentences.\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}")
])

# Combine docs + LLM
question_answer_chain = create_stuff_documents_chain(llm, prompt)

# Final RAG chain
rag_chain = create_retrieval_chain(retriever, question_answer_chain)


In [73]:
try:
    response = rag_chain.invoke({"input": "What is The Penal Code, 1860"})
    print("Answer:", response["answer"])
except Exception as e:
    print("⚠️ OpenAI Error:", e)


Answer: The Penal Code, 1860 is a legislation passed on October 6th, 1860. It appears to be a code of laws that outlines punishments for various offenses.


In [66]:
from langchain_community.llms import Ollama

llm = Ollama(model="llama3")


In [74]:
try:
    response = rag_chain.invoke({"input": "What is The Penal Code, 1860?"})
    print("Answer:", response["answer"])
except Exception as e:
    print("⚠️ OpenAI Error:", e)


Answer: The Penal Code, 1860 is a law that was passed on October 6th, 1860. It outlines the punishments for various offenses in Chapter III, Section 2188, specifically discussing the types of punishment offenders are liable for under the code's provisions.


In [68]:
from langchain_pinecone import PineconeVectorStore
from langchain_community.embeddings import HuggingFaceEmbeddings

index_name = "lawbot"
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embedding_model
)

retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3})


In [70]:
from langchain_community.llms import Ollama

llm = Ollama(model="llama3")  # make sure llama3 is downloaded in Ollama


In [71]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

# System prompt
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the answer concise.\n\n"
    "{context}"
)

# Prompt template
prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}")
])

# Combine
question_answer_chain = create_stuff_documents_chain(
    llm=llm,
    prompt=prompt
)

rag_chain = create_retrieval_chain(
    retriever=retriever,
    combine_docs_chain=question_answer_chain
)


In [75]:
query = "What is The Societies Registration Act?"

try:
    response = rag_chain.invoke({"input": query})
    print("Answer:", response["answer"])
except Exception as e:
    print("⚠️ Error:", e)


Answer: The Societies Registration Act, 1860 is a law that governs the registration of societies in India. It was passed on May 21st, 1860, and provides for the certification of society registration by the registrar upon filing of the memorandum and rules and regulations of the society. The act aims to regulate the formation and management of societies.


In [76]:
query = "Whoever does any act under such circumstances?"

try:
    response = rag_chain.invoke({"input": query})
    print("Answer:", response["answer"])
except Exception as e:
    print("⚠️ Error:", e)

Answer: I don't know. The provided context appears to be a section from an Indian penal code, but it doesn't contain information about someone doing an act under certain circumstances. It discusses acts and omissions, liability for criminal acts, and common intention.


In [80]:
query = "Inchoate stamped instruments?"

try:
    response = rag_chain.invoke({"input": query})
    print("Answer:", response["answer"])
except Exception as e:
    print("⚠️ Error:", e)

Answer: I found information on "Inchoate Stamped Instruments" in Act No. XXVI of 1881. According to the context, inchoate stamped instruments refer to instruments that are not yet completed or executed and bear a stamp.
