## AI - Research Engine for Commercial Courts

In [21]:
# !pip install langchain_community pandas numpy langchain_core langchain 

In [22]:
# !pip install torch

In [23]:
# !pip install pypdf

In [24]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferWindowMemory
from langchain.chains import ConversationalRetrievalChain
# from langchain_together import Together
from langchain.embeddings.base import Embeddings


from transformers import AutoTokenizer, AutoModel
import torch
import numpy as np

In [25]:
from langchain_community.document_loaders import PyPDFLoader 
# import streamlit as st

loader = PyPDFLoader("Case-Test.pdf")
docs = loader.load()
docs

[Document(metadata={'producer': 'PDFium', 'creator': 'PDFium', 'creationdate': 'D:20240909212839', 'source': 'Case-Test.pdf', 'total_pages': 4, 'page': 0, 'page_label': '1'}, page_content="2 All]                     Manoj Kumar Pandey and others V. State of U.P. and another 593\nORIGINAL JURISDICTION \nCIVIL SIDE \nDATED: ALLAHABAD 27.02.2006 \n \nBEFORE \nTHE HON’BLE DR. B.S. CHAUHAN, J. \nTHE HON’BLE DILIP GUPTA, J. \n \nCivil Misc. Writ Petition No. 40736 of 2002 \n \nManoj Kumar Pandey & others ...Petitioners \nVersus \nState of U.P. and another   ...Opp. parties \n \nCounsel for the Petitioners: \nSri S.P. Pandey \nSri D.P. Shukla \n \nCounsel for the Opposite Parties: \nSri B.N. Singh \n \nConstitution of India, Art. 226 -Right to \nappointment-petitioner appeared in \ncompetative examination-held for the \nPost of A.P.O. result declared on 20.3.99-\nState Government send requisition \n26.7.01-petition filed in September 2002 \ne.g. much after expiry of the life of \nwaiting list

In [26]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
text_splitter.split_documents(docs)[:5]

[Document(metadata={'producer': 'PDFium', 'creator': 'PDFium', 'creationdate': 'D:20240909212839', 'source': 'Case-Test.pdf', 'total_pages': 4, 'page': 0, 'page_label': '1'}, page_content="2 All]                     Manoj Kumar Pandey and others V. State of U.P. and another 593\nORIGINAL JURISDICTION \nCIVIL SIDE \nDATED: ALLAHABAD 27.02.2006 \n \nBEFORE \nTHE HON’BLE DR. B.S. CHAUHAN, J. \nTHE HON’BLE DILIP GUPTA, J. \n \nCivil Misc. Writ Petition No. 40736 of 2002 \n \nManoj Kumar Pandey & others ...Petitioners \nVersus \nState of U.P. and another   ...Opp. parties \n \nCounsel for the Petitioners: \nSri S.P. Pandey \nSri D.P. Shukla \n \nCounsel for the Opposite Parties: \nSri B.N. Singh \n \nConstitution of India, Art. 226 -Right to \nappointment-petitioner appeared in \ncompetative examination-held for the \nPost of A.P.O. result declared on 20.3.99-\nState Government send requisition \n26.7.01-petition filed in September 2002 \ne.g. much after expiry of the life of \nwaiting list

In [27]:
documents=text_splitter.split_documents(docs)
documents

[Document(metadata={'producer': 'PDFium', 'creator': 'PDFium', 'creationdate': 'D:20240909212839', 'source': 'Case-Test.pdf', 'total_pages': 4, 'page': 0, 'page_label': '1'}, page_content="2 All]                     Manoj Kumar Pandey and others V. State of U.P. and another 593\nORIGINAL JURISDICTION \nCIVIL SIDE \nDATED: ALLAHABAD 27.02.2006 \n \nBEFORE \nTHE HON’BLE DR. B.S. CHAUHAN, J. \nTHE HON’BLE DILIP GUPTA, J. \n \nCivil Misc. Writ Petition No. 40736 of 2002 \n \nManoj Kumar Pandey & others ...Petitioners \nVersus \nState of U.P. and another   ...Opp. parties \n \nCounsel for the Petitioners: \nSri S.P. Pandey \nSri D.P. Shukla \n \nCounsel for the Opposite Parties: \nSri B.N. Singh \n \nConstitution of India, Art. 226 -Right to \nappointment-petitioner appeared in \ncompetative examination-held for the \nPost of A.P.O. result declared on 20.3.99-\nState Government send requisition \n26.7.01-petition filed in September 2002 \ne.g. much after expiry of the life of \nwaiting list

In [28]:
from transformers import AutoTokenizer, AutoModel
import torch

def embed_text(texts):
    """
    Takes a list of text strings and returns their embeddings.
    
    Args:
        texts (list): A list of document text strings.
        
    Returns:
        torch.Tensor: A tensor containing the embeddings of the input texts.
    """
    # Load the tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained("law-ai/InLegalBERT")
    model = AutoModel.from_pretrained("law-ai/InLegalBERT")
    
    # Tokenize and encode the texts in batches
    encoded_inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
    
    # Pass the inputs through the model
    with torch.no_grad():  # Disable gradient calculation for efficiency
        model_output = model(**encoded_inputs)
    
    # The embeddings are typically extracted from the last hidden state of the [CLS] token
    embeddings = model_output.last_hidden_state[:, 0, :]  # Taking the [CLS] token embedding for each text
    
    return embeddings


In [29]:
# !pip install faiss-cpu


In [30]:
from langchain_community.embeddings import OllamaEmbeddings

embedding_model = OllamaEmbeddings(model="mxbai-embed-large")
db = FAISS.from_documents(documents[:5], embedding_model)


In [31]:
db

<langchain_community.vectorstores.faiss.FAISS at 0x13a0d9650>

In [32]:
query="An attention function can be described as mapping a query "
result=db.similarity_search(query)
result[0].page_content

'Commission to recommend the names of'

In [33]:
from langchain_community.llms import Ollama
## Load Ollama LAMA2 LLM model
llm=Ollama(model="llama3.2:1b")
llm

Ollama(model='llama3.2:1b')

In [34]:
from langchain_core.prompts import ChatPromptTemplate

# Define a more flexible and clear prompt template
# prompt_template = """
# Answer the following question based strictly on the provided context. 
# Provide a thoughtful, step-by-step explanation before giving your final answer. 
# A bonus of $1000 is yours if the user deems the answer helpful!

# Context:
# {context}

# Question: {input}
# """

# Create the ChatPromptTemplate from the improved template

prompt=ChatPromptTemplate.from_messages (
    [
        ("system", "You are a helpful assistant. Please ask me any questions !!!"),
        ("user", "Context: {context}, Input: {input}")
    ]
)
# prompt = ChatPromptTemplate.from_template(prompt_template)

In [35]:
from pydantic import BaseModel, ConfigDict

class MyConfig(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)


In [36]:
## Chain Introduction
## Create Stuff Docment Chain

from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain=create_stuff_documents_chain(llm,prompt)

In [37]:
"""
Retrievers: A retriever is an interface that returns documents given
 an unstructured query. It is more general than a vector store.
 A retriever does not need to be able to store documents, only to 
 return (or retrieve) them. Vector stores can be used as the backbone
 of a retriever, but there are other types of retrievers as well. 
 https://python.langchain.com/docs/modules/data_connection/retrievers/   
"""

retriever=db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x13a0d9650>, search_kwargs={})

In [38]:
"""
Retrieval chain:This chain takes in a user inquiry, which is then
passed to the retriever to fetch relevant documents. Those documents 
(and original inputs) are then passed to an LLM to generate a response
https://python.langchain.com/docs/modules/chains/
"""
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)

In [39]:
response=retrieval_chain.invoke({"input":"Vodafone vs Union of India ? give me just the answer"})

In [40]:
arr = response['answer'].split("\n\n")

for i in arr:
    print(i)

The court held that petitioners cannot claim relief which was granted to other persons by the Apex Court.
