In [10]:
# %%
import pandas as pd 
import numpy as np 
from pathlib import Path
from glob import glob
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, download_loader
import os 
from dotenv import load_dotenv
load_dotenv('.env')

# downloading the loader

loader = download_loader("SimpleDirectoryReader")

# the dataset files
dataset_a1 = "dataset/summary/A1"
dataset_a2 = "dataset/summary/A2"

# loading the files 
dataset_a1_files = glob(f"{dataset_a1}/*.txt")
dataset_a2_files = glob(f"{dataset_a2}/*.txt")

# combining the data
comb_data = dataset_a1_files + dataset_a2_files

# creating the documents
documents = SimpleDirectoryReader(input_files=comb_data).load_data()
# creating indexing from documents
index = GPTVectorStoreIndex.from_documents(documents=documents, openai_api_key=os.environ["OPENAI_API_KEY"])

# creating the query engine
query_engine = index.as_query_engine()


# getting response for query 

response = query_engine.query("what is the law case?")
print(response)

def get_response(query):
    # combining the data
    comb_data = dataset_a1_files + dataset_a2_files

    # creating the documents
    documents = SimpleDirectoryReader(input_files=comb_data).load_data()
    # creating indexing from documents
    index = GPTVectorStoreIndex.from_documents(documents=documents, openai_api_key=os.environ["OPENAI_API_KEY"])

    # creating the query engine
    query_engine = index.as_query_engine()
    response = query_engine.query(query)
    return response


# %%






The law case is that an appeal shall lie from a judgment or order passed by the one judge of the high court in exercise of original jurisdiction under Article 226 of the Constitution of India, to a division bench comprising of two judges of the same high court. However, no such appeal shall lie against an interlocutory order or against an order passed in exercise of supervisory jurisdiction under Article 227 of the Constitution of India.


In [25]:
index = GPTVectorStoreIndex.from_documents(documents=documents, openai_api_key=os.environ["OPENAI_API_KEY"])
query_engine = index.as_query_engine()



In [27]:
response = query_engine.query("""
show me cases related to mortgage
""")
print(response)

The case of Topandas v. Firm of G. Appalaswamy (1963 Indlaw SC 379) is related to mortgage. In this case, the Supreme Court of India considered the question of whether a sitting tenant who took property by a possessory or usufructuary mortgage in his favour was liable to deliver physical possession upon redemption to the mortgagor. The Gujarat High Court decision in Patel Atmaram Nathudas v. Babubhai Keshavlal (1974 Indlaw Guj 88) is also related to mortgage. In this case, the court held that it would be unreasonable to attribute to a tenant the intention to surrender the tenancy and to invoke the sophisticated doctrine of implied surrender.


In [14]:
loader = download_loader("SimpleDirectoryReader")


In [15]:
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, ServiceContext, download_loader
from llama_index.output_parsers import LangchainOutputParser
from llama_index.llm_predictor import StructuredLLMPredictor
from llama_index.prompts.prompts import QuestionAnswerPrompt, RefinePrompt
from llama_index.prompts.default_prompts import DEFAULT_TEXT_QA_PROMPT_TMPL, DEFAULT_REFINE_PROMPT_TMPL
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from glob import glob 

# load documents, build index
dataset_a1 = "dataset/summary/A1"
dataset_a2 = "dataset/summary/A2"

# loading the files 
dataset_a1_files = glob(f"{dataset_a1}/*.txt")
dataset_a2_files = glob(f"{dataset_a2}/*.txt")

comb_data = dataset_a1_files + dataset_a2_files

documents = SimpleDirectoryReader(input_files = comb_data).load_data()
index = GPTVectorStoreIndex.from_documents(documents,chunk_size=512)
llm_predictor = StructuredLLMPredictor()

# define output schema
response_schemas = [
        ResponseSchema(name="answer", description="answer to the user's question"),
]

# define output parser
lc_output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
output_parser = LangchainOutputParser(lc_output_parser)

# format each prompt with output parser instructions
fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL)
fmt_refine_tmpl = output_parser.format(DEFAULT_REFINE_PROMPT_TMPL)
qa_prompt = QuestionAnswerPrompt(fmt_qa_tmpl, output_parser=output_parser)
refine_prompt = RefinePrompt(fmt_refine_tmpl, output_parser=output_parser)

# query index
query_engine = index.as_query_engine(
    service_context=ServiceContext.from_defaults(
        llm_predictor=llm_predictor
    ),
    text_qa_template=qa_prompt, 
    refine_template=refine_prompt, 
)


In [21]:
query_engine.query("what is the law case?")

OutputParserException: Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)

In [None]:
from llama_index import Document, GPTListIndex, LLMPredictor, ServiceContext, load_index_from_storage

def get_llm(llm_name, model_temperature, api_key, max_tokens=256):
    os.environ['OPENAI_API_KEY'] = api_key
    if llm_name == "text-davinci-003":
        return OpenAI(temperature=model_temperature, model_name=llm_name, max_tokens=max_tokens)
    else:
        return ChatOpenAI(temperature=model_temperature, model_name=llm_name, max_tokens=max_tokens)

def extract_terms(documents, term_extract_str, llm_name, model_temperature, api_key):
    llm = get_llm(llm_name, model_temperature, api_key, max_tokens=1024)

    service_context = ServiceContext.from_defaults(llm_predictor=LLMPredictor(llm=llm),
                                                   chunk_size=1024)

    temp_index = GPTListIndex.from_documents(documents, service_context=service_context)
    query_engine = temp_index.as_query_engine(response_mode="tree_summarize")
    terms_definitions = str(query_engine.query(term_extract_str))
    terms_definitions = [x for x in terms_definitions.split("\n") if x and 'Term:' in x and 'Definition:' in x]
    # parse the text into a dict
    terms_to_definition = {x.split("Definition:")[0].split("Term:")[-1].strip(): x.split("Definition:")[-1].strip() for x in terms_definitions}
    return terms_to_definition