In [82]:
# Import neccesary libraries
import os
from dotenv import load_dotenv
import chromadb
from openai import OpenAI
from chromadb.utils import embedding_functions

In [83]:
# Load environment variables from .env file
load_dotenv()
openai_key = os.getenv("OPENAI_API_KEY")

In [84]:
# load the OpenAI embedding function
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=openai_key, model_name="text-embedding-ada-002")

In [85]:
# Initialize the Chroma client with persistence
chroma_client = chromadb.PersistentClient(path="chroma_persistent_storage")
collection_name = "document_qa_collection"
collection = chroma_client.get_or_create_collection(
    name=collection_name, embedding_function=openai_ef)

In [86]:
client = OpenAI(api_key=openai_key)

In [87]:
# # Be sure client is running
# resp = client.chat.completions.create(
#     model="gpt-3.5-turbo",
#     messages=[
#         {"role": "system", "content": "You are a helpful assistant."},
#         {
#             "role": "user",
#             "content": "Who is Jesus",
#         },
#     ],
# )

# print(resp.choices[0].message.content)

In [88]:
# Function to load documents from a directory
def load_documents_from_directory(directory_path):
    print("==== Loading documents from directory ====")
    documents = []
    for filename in os.listdir(directory_path):
        if filename.endswith(".txt"):
            with open(
                os.path.join(directory_path, filename), "r", encoding="utf-8"
            ) as file:
                documents.append({"id": filename, "text": file.read()})
    return documents

In [89]:
# Load documents from the directory
directory_path = os.getenv("20casedocs")
documents = load_documents_from_directory(directory_path)

print(f"Loaded {len(documents)} documents")

==== Loading documents from directory ====
Loaded 20 documents


In [90]:
# Function to split text into chunks
def split_text(text, chunk_size=1000, chunk_overlap=20):
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunks.append(text[start:end])
        start = end - chunk_overlap
    return chunks

# Split documents into chunks
chunked_documents = []
for doc in documents:
    chunks = split_text(doc["text"])
    #print("==== Splitting docs into chunks ====")
    for i, chunk in enumerate(chunks):
        chunked_documents.append({"id": f"{doc['id']}_chunk{i+1}", "text": chunk})

print(f"Split documents into {len(chunked_documents)} chunks")

Split documents into 716 chunks


In [91]:
chunked_documents[0]

{'id': 'C9.txt_chunk1',
 'text': "West Bengal State Electricity Board and Others v Desh Bandhu Ghosh and Others\nSupreme Court of India\n\n26 February 1985\nCivil Appeal No. 562 of 1985\nThe Judgment was delivered by : O. Chinnappa Reddy, J.\nSpecial leave granted.\n1.  The West Bengal State Electricity Board is the principal appellant in this appeal by special leave which we have just now granted. The first respondent, a permanent employee of the West Bengal State Electricity Board, filed the writ petition out of which the appeal arises in the Calcutta High Court to quash an order dated march 22, 1984 of the Secretary, West Bengal State Electricity Board terminating his services as Deputy Secretary with immediate effect on payment of three month's salary in lieu of three month's notice. The order gave no reasons for terminating the services of the respondent and there was nothing in the order which could possibly be said to attach any stigma to the respondent. Apparently the order was

In [92]:
# Function to generate embeddings using OpenAI API
def get_openai_embedding(text):
    response = client.embeddings.create(input=text, model="text-embedding-ada-002")
    embedding = response.data[0].embedding
    #print("==== Generating embeddings... ====")
    return embedding


# Generate embeddings for the document chunks
for doc in chunked_documents:
    #print("==== Generating embeddings... ====")
    doc["embedding"] = get_openai_embedding(doc["text"])

#print(doc["embedding"])   

In [93]:
# Upsert documents with embeddings into Chroma
for doc in chunked_documents:
    # print("==== Inserting chunks into db;;; ====")
    collection.upsert(
        ids=[doc["id"]], documents=[doc["text"]], embeddings=[doc["embedding"]])

In [94]:
# Retrieve a document from the collection
retrieved_data = collection.get(ids=[doc["id"]])  # Retrieve by ID
print("Document:", retrieved_data["documents"][0])
print("Embedding:", retrieved_data["embeddings"][0]) 

Document: anding crops in the field. The view of the Trial Court, having regard to the aforementioned facts and circumstances of the case, was, therefore, a possible view and as such we need not go into the other contentions as regards the motive or time of death, vis- -vis, the medical opinion etc.
49. For the reasons mentioned hereinbefore, we are of the opinion that the High Court was not correct in arriving at the conclusion that the view of the Trial Court was wholly perverse and could not be sustained on the materials brought on record by the prosecution. This appeal is, therefore, allowed.
50. The impugned judgment of the High Court is set aside. The appellants are on bails. They are discharged from their bail bonds.



TypeError: 'NoneType' object is not subscriptable

In [95]:
# Function to query documents
def query_documents(question, n_results=20):
    # query_embedding = get_openai_embedding(question)
    results = collection.query(query_texts=question, n_results=n_results)

    # Extract the relevant chunks
    relevant_chunks = [doc for sublist in results["documents"] for doc in sublist]
    print("==== Returning relevant chunks ====")
    return relevant_chunks

In [96]:
def generate_response(question, relevant_chunks):
    context = "\n\n".join(relevant_chunks)
    prompt = (
        "You are a legal assistant trained to provide clear, structured, and empathetic responses to legal queries. "
        "When a user asks a question, respond in a conversational and straightforward manner, providing information in this structure:\n\n"
        "1. **Empathetic Acknowledgment:** Start by acknowledging the user’s situation in a compassionate tone. Avoid legal jargon in this part.\n\n"
        "2. **Legal Context:** Provide a simple explanation of the legal principles relevant to the query. Use clear, accessible language.\n\n"
        "3. **Case Example:** Retrieve and summarize a case relevant to the user’s query. Include the following details:\n"
        "   - **Case Name:** The title of the case, listing the parties involved.\n"
        "   - **Court:** The name of the court where the judgment was delivered.\n"
        "   - **Date:** The date the judgment was delivered.\n"
        "   - **Case Reference Number:** The unique identifier for the case, such as appeal or writ number.\n"
        "   - **Judgment Author:** The name of the judge or judges who delivered the judgment.\n"
        "   - **Case Summary:** Include a brief description of the facts, legal issue, and decision in the case, tailored to how it relates to the user's situation.\n\n"
        "4. **Steps the User Can Take:** Offer 3-5 clear and practical steps that the user can follow to address their issue. Ensure these steps are actionable and easy to understand.\n\n"
        "5. **Additional Resources:** Provide links or references to relevant laws, legal bodies, or official documentation for further exploration.\n\n"
        "Ensure the response is concise and written in plain language, making it easy to follow and understand.\n\n"
        "Example Query Input:\n"
        "I believe I was wrongfully terminated from my job. What can I do?\n\n"
        "Example Response Format:\n"
        "I'm sorry to hear about your situation. In the UK, if you believe you've been wrongfully dismissed, you have the right to challenge your employer's decision. Wrongful dismissal occurs when an employer breaches the terms of your employment contract, such as dismissing you without the proper notice period.\n\n"
        "**Case Example: Sekander v Rocketmill Ltd**\n"
        "A relevant case is Sekander v Rocketmill Ltd (ET/2301645/2016), where Mr. Sekander was both wrongfully and unfairly dismissed. He began working for Rocketmill Ltd in 2010 as a web designer. In 2013, he entered into a Service Agreement that required him to work for ten years before realizing the market value of his shareholding, unless terminated for gross misconduct. Despite this agreement, Mr. Sekander was dismissed on allegations of gross misconduct. The Employment Tribunal found that the employer did not have sufficient grounds for gross misconduct and had breached the contractual terms, leading to a ruling in favor of Mr. Sekander for both wrongful and unfair dismissal.\n\n"
        "**Steps You Can Take:**\n"
        "1. Review your employment contract to understand notice periods and dismissal procedures.\n"
        "2. Gather evidence, such as communications, performance reviews, or dismissal letters.\n"
        "3. Seek legal advice from an employment law solicitor.\n"
        "4. Notify ACAS for Early Conciliation to resolve the dispute.\n"
        "5. File a claim with an employment tribunal within three months of dismissal.\n\n"
        "For more information, you can refer to the UK government’s employment tribunal website.\n\n"
        "Context:\n" + context + "\n\nQuestion:\n" + question
    )

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": prompt,
            },
            {
                "role": "user",
                "content": question,
            },
        ],
    )

    answer = response.choices[0].message
    return answer


In [102]:
question = "Someone played a fast one on me. We had a na agreement that if they fall short in paying what they owe me, i would confiscate their item of value but they took everything that was valuable out of it. I feel cheated because the item has no vlaue again"
relevant_chunks = query_documents(question)
answer = generate_response(question, relevant_chunks)

print(answer)

==== Returning relevant chunks ====
ChatCompletionMessage(content="I'm sorry to hear about what happened to you. In legal terms, when there is an agreement where someone owes you something, and in case of non-payment, you have a right to confiscate their valuable item, but they take everything valuable, you might be facing a situation of unfair dealing and a breach of agreement. This might constitute a fraudulent or dishonest intention to cheat you.\n\n**Case Example: State of Jharkhand vs. Infrasoft Ltd.**\n- **Court:** Supreme Court of India\n- **Date:** January 15, 2019\n- **Case Reference Number:** Criminal Appeal No. 234/2018\n- **Judgment Author:** Honorable Justice K.S. Radhakrishnan\n- **Case Summary:** In this case, Infrasoft Ltd. entered into an agreement with the State of Jharkhand regarding the supply of materials for a public project. Infrasoft Ltd. received the goods but failed to pay the agreed amount under the contract. The State of Jharkhand was allowed by the contract

In [100]:
relevant_chunks


['le to the lands in suit and directing the respondents who were the appellants\' benamidars to restore possession, this Court, after discussing the English and Indian law on the subject, said :\n"The correct position in law, in our opinion, is that what one has to see is whether the illegality goes so much to the root of the matter that the plaintiff cannot bring his action without relying upon the illegal transaction into which he had entered. If the illegality be trivial or venial, as stated by Willistone and the plaintiff is not required to rest his case upon that illegality, then public policy demands that the defendant should not be allowed to take advantage of the position. A strict view, of course, must be taken of the plaintiff\'s conduct, and he should not be allowed to circumvent the illegality by restoring to some subterfuge or by mis-stating the facts. If, however, the matter is clear and the illegality is not required to be pleaded or proved as part of the cause of action