In [1]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.indices import ListIndex
from llama_index.core import Settings
import chromadb
import openai
import os
import utils
from dotenv import load_dotenv


load_dotenv()  # take environment variables from .env.

True

### Summarisation using tree index

In [2]:
import tiktoken
from llama_index.core.callbacks import CallbackManager, TokenCountingHandler
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings


token_counter = TokenCountingHandler(
    tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode
)

Settings.callback_manager = CallbackManager([token_counter])

In [3]:
# import time
# start_time = time.time()

documents = SimpleDirectoryReader(
        input_files=["documents_4_v0/Shayara_Bano_vs_Union_Of_India_And_Ors_Ministry_Of_Women_on_22_August_2017.PDF"]
    ).load_data()

index = ListIndex.from_documents(documents)

prompt = "Summarize this document"

sumarissed_response = index.as_query_engine(response_mode="tree_summarize").query(prompt)

# print(time.time() - start_time, 'seconds taken')
print(sumarissed_response)

The document extensively examines the practice of 'talaq-e-biddat' (triple talaq) in Muslim personal law in India, focusing on its historical origins, legal implications, and debates on its validity. It discusses the challenges to its validity under constitutional principles, the importance of gender equality, and the need for legislative and judicial interventions to address discriminatory practices within personal laws. The document emphasizes the protection of personal laws under the Constitution, particularly Article 25, and suggests legislative action to address issues related to 'talaq-e-biddat'. It also touches upon court cases, judgments, and Quranic principles, highlighting the legal challenge to triple talaq in India and the differing views on the constitutional scrutiny of personal laws.


In [4]:
print(
    "Embedding Tokens: ",
    token_counter.total_embedding_token_count,
    "\n",
    "LLM Prompt Tokens: ",
    token_counter.prompt_llm_token_count,
    "\n",
    "LLM Completion Tokens: ",
    token_counter.completion_llm_token_count,
    "\n",
    "Total LLM Token Count: ",
    token_counter.total_llm_token_count,
    "\n",
)

Embedding Tokens:  0 
 LLM Prompt Tokens:  249396 
 LLM Completion Tokens:  9605 
 Total LLM Token Count:  259001 



In [5]:
token_counter.reset_counts()

### Summarisation using simple query engine

In [6]:
documents = SimpleDirectoryReader(
        input_files=["documents_4_v0/Shayara_Bano_vs_Union_Of_India_And_Ors_Ministry_Of_Women_on_22_August_2017.PDF"]
    ).load_data()

index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()

response = query_engine.query("Summarize this document")
print(response)

The document discusses the importance of judges embodying various qualities to make objective decisions, emphasizing the need for purposive interpretation of dynamic constitutional concepts. It mentions the role of social legislation in ordering people's lives and the need for laws to expand freedom and address social inequalities. The document also touches upon the need to divorce religion from personal law and foster a national identity while preserving Indian cultural pluralism. Additionally, it references specific verses from the Quran related to women's courses.


In [7]:
print(
    "Embedding Tokens: ",
    token_counter.total_embedding_token_count,
    "\n",
    "LLM Prompt Tokens: ",
    token_counter.prompt_llm_token_count,
    "\n",
    "LLM Completion Tokens: ",
    token_counter.completion_llm_token_count,
    "\n",
    "Total LLM Token Count: ",
    token_counter.total_llm_token_count,
    "\n",
)

Embedding Tokens:  206777 
 LLM Prompt Tokens:  1315 
 LLM Completion Tokens:  93 
 Total LLM Token Count:  1408 



In [8]:
len(documents)

312

In [13]:
token_counter.reset_counts()

### Summarisation using simple query engine with top k

In [12]:
len(documents[0:10])

10

In [14]:
documents = SimpleDirectoryReader(
        input_files=["documents_4_v0/Shayara_Bano_vs_Union_Of_India_And_Ors_Ministry_Of_Women_on_22_August_2017.PDF"]
    ).load_data()

if len(documents) > 20:
    documents = documents[:20]

index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()

response = query_engine.query("Summarize this document")
print(response)

The document discusses a legal case involving the practice of 'talaq-e-biddat' (triple talaq) in Muslim personal law. The respondent-husband claims that his pronouncement of divorce was valid under the Hanafi sect of Sunni Muslims and in accordance with Sharia law. The husband argues that the wife's petition under Article 32 of the Indian Constitution is not maintainable as the issues raised are not justiciable. The court decided to focus specifically on the issue of triple talaq in this case.


In [15]:
print(
    "Embedding Tokens: ",
    token_counter.total_embedding_token_count,
    "\n",
    "LLM Prompt Tokens: ",
    token_counter.prompt_llm_token_count,
    "\n",
    "LLM Completion Tokens: ",
    token_counter.completion_llm_token_count,
    "\n",
    "Total LLM Token Count: ",
    token_counter.total_llm_token_count,
    "\n",
)

Embedding Tokens:  10755 
 LLM Prompt Tokens:  1075 
 LLM Completion Tokens:  107 
 Total LLM Token Count:  1182 

