In [2]:
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.retrievers.self_query.base import SelfQueryRetriever
import datetime
import chromadb
import requests
from django.views import View
from django.utils.decorators import method_decorator
from django.views.decorators.csrf import csrf_exempt

from dotenv import load_dotenv
from nltk.corpus import stopwords
from os import walk
from decouple import config
import os
import getpass

api_key = config('OPENAI_TOKEN')
os.environ["OPENAI_API_KEY"] = str(api_key)
MAX_QUERY_DOCUMENTS = 5
DISTANCE_THRESHOLD = 0.43  # Arbitrary value
LAW_COLLECTIONS_DB = '../../database/chromadb/law_collections'

INPUT_TOKEN_PRICING = 0.01 #$ / 1K tokens
OUTPUT_TOKEN_PRICING = 0.03 #$ / 1K tokens

In [3]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.schema import Document
from langchain.vectorstores import Chroma
embeddings = OpenAIEmbeddings()
vectorstores = {}

client = chromadb.PersistentClient(path=LAW_COLLECTIONS_DB)
for collection in client.list_collections():
    docs = []
    collection_content = collection.get()
    for i, document in enumerate(collection_content['documents']):
        docs.append(Document(page_content=document, metadata=collection_content['metadatas'][i]))
    vectorstores[collection.name] = Chroma.from_documents(docs, embeddings)

In [20]:
vectorstores.keys()

dict_keys(['NON_CITIZENS_PROPERTY_RESTRICTION_ACT', 'INSOLVENCY_ACT_2009', 'THE_INTERPRETATION_AND_GENERAL_CLAUSES_ACT_1974', 'COMPANIES_REGULATIONS', 'THE_COMPANIES_ACT_2001', 'THE_INSOLVENCY_REGULATIONS_2020'])

In [4]:
from langchain.chains.query_constructor.schema import AttributeInfo
from langchain.llms import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
metadata_field_info = [
    AttributeInfo(
        name="begin_page",
        description="The number of the page where the section begins",
        type="number",
    ),
    AttributeInfo(
        name="section_number",
        description="The number of the section",
        type="number",
    ),
    AttributeInfo(
        name="document_name",
        description="The name of the law from whihc the section is extracted",
        type="string",
    ),
    AttributeInfo(
        name="document_path",
        description="Irrelevant",
        type="string",
    ),
    AttributeInfo(
        name="part_title",
        description="The title of the part where the section is extracted",
        type="string",
    ),
]
document_content_description = "The companies act 2001 of Mauritius"
llm = ChatOpenAI(temperature=0, model="gpt-4-1106-preview")
retriever = SelfQueryRetriever.from_llm(
    llm, vectorstores["THE_COMPANIES_ACT_2001"], document_content_description, metadata_field_info, verbose=True
)


In [5]:
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory
)
prompt = "Can you summarize the section 161 indemnity and insurance of the companies act 2001 ?"
conversation_chain.run(prompt)

"I'm sorry, but I don't have access to the specific text of the Companies Act 2001, including section 161 regarding indemnity and insurance. To get an accurate summary of that section, you would need to consult the actual legislation or legal resources that can provide you with the details of that provision. Legal professionals or official government resources could also assist you in understanding the specifics of the law. If you have access to the text of the law, I could help summarize it, but without the text, I'm unable to provide you with the information you're seeking."

In [8]:
retrieved_documents = retriever.get_relevant_documents(prompt)
retrieved_documents

[]