In [None]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn
warnings.filterwarnings('ignore')

import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_community.llms import Ollama
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, AIMessage
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.memory import ConversationBufferMemory

import pickle

### Define directory with context files

In [2]:
directory = './context/'

In [3]:
all_documents = []

### Load all files present in given directory

In [4]:
for filename in os.listdir(directory):
    if filename.endswith('.pdf'):
        filepath = os.path.join(directory, filename)
        loader = PyPDFLoader(filepath)
        documents = loader.load()
        all_documents.extend(documents)

In [8]:
with open('cache.pkl', 'wb') as f:
    pickle.dump(all_documents, f)

### Chunking

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = text_splitter.split_documents(all_documents)

print(f'Total number of chunks: {len(texts)}')

### Load Embedding Model

In [None]:
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
print('embedding model loaded')

### Load the Vector DB or Create one

In [None]:
if os.path.exists('chroma_db'):
    print('Loading from saved db')
    vectorstore = Chroma(embedding_function=embeddings, persist_directory="chroma_db")
    print('document loaded')
else:
    print('No vector previously created db found... Creating new db')
    vectorstore = Chroma.from_documents(texts, embeddings, persist_directory="chroma_db")
    print('document ingested')

### Load a LLM/SLM

In [4]:
llm = Ollama(model="phi3.5")

### Create a prompt

In [None]:
system_prompt = """
        You are an AI lawyer specializing in Indian law.
        Your role is to provide clear, concise, and accurate legal advice based solely on the information from the provided documents and prior conversations with the user.
        You must always respond as a legal expert and avoid disclaiming your expertise.
        If an answer is unknown, simply state that and refrain from speculation.
        Cite relevant law sections, acts, or provisions in your response.
        Note: The developer has provided the legal documents, not the user.

        Previous conversations:
        {history}

        Document context:
        {context}
    """

qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )

In [None]:
retriever = vectorstore.as_retriever()

In [None]:
history = []

### Define a function with Retrival Model

In [None]:
def fnAsk():
    while True:
        query = input("Question:")

        print("User:", query, "\n")
        if query.lower() in ["quit","exit","bye"]:
            print("Bot: Goodbye!")
            break

        history.append({"role": "user", "content": HumanMessage(content=query)})

        if query:
            relevant_docs = retriever.invoke(query)
            context_documents_str = "\n\n".join(doc.page_content for doc in relevant_docs)
        else:
            context_documents_str = ""

        qa_prompt_local  = qa_prompt.partial(
            history=history,
            context=context_documents_str
        )

        llm_chain = { "input": RunnablePassthrough() } | qa_prompt_local  | llm

        result = llm_chain.invoke(query)

        history.append({"role": "assistant", "content": AIMessage(content=result)})

        print("Bot:", result, "\n\n")

### Run the function

In [None]:
fnAsk()