In [1]:
from llama_index import SimpleDirectoryReader, GPTListIndex, LLMPredictor, GPTVectorStoreIndex, PromptHelper, ServiceContext, StorageContext, load_index_from_storage, Document
from langchain import OpenAI
from openai.error import RateLimitError
import sys, os, shutil
import pandas as pd
from pathlib import Path

In [2]:
def move_files_into_folder():
  # move files into a folder
  dir_name = 'knowledge'
  for i in os.listdir(dir_name):
    os.mkdir(os.path.join(dir_name , i.split(".")[0]))
    shutil.move(os.path.join(dir_name , i), os.path.join(dir_name , i.split(".")[0]))

In [3]:
os.environ["OPENAI_API_KEY"] = "YOUR OPENAI KEY HERE"

In [4]:
def create_index(path,folder):
    max_input = 4096
    tokens = 256
    chunk_size = 600
    max_chunk_overlap = 0.1

    prompt_helper = PromptHelper(max_input, tokens, max_chunk_overlap, chunk_size_limit=chunk_size)

    #define LLM
    llmPredictor = LLMPredictor(llm=OpenAI(temperature=0, model="text-ada-001", max_tokens=tokens))

    #load data
    docs = SimpleDirectoryReader(path).load_data()

    #create vector index
    service_context = ServiceContext.from_defaults(llm_predictor=llmPredictor,prompt_helper=prompt_helper)
    vectorIndex = GPTVectorStoreIndex.from_documents(documents=docs,service_context=service_context)

    vectorIndex.storage_context.persist(persist_dir="Store/"+folder)

    return vectorIndex

In [16]:
# for creating knowledge database
for folders in os.listdir('knowledge'):
    f = os.path.join('knowledge', folders)
    print(f,folders)
    create_index(f,folders)

knowledge\DCCC000526_2010 DCCC000526_2010


In [5]:
if os.path.isfile('test_criminal.csv'):
    df = pd.read_csv('test_criminal.csv')
else:
    df = pd.DataFrame(columns=['Case ID','Date','Defendent','Offence','Judgement','Social work hours','Jail time days'])

In [6]:
def answerMe(question,folder):
    storage_context = StorageContext.from_defaults(persist_dir=folder)
    index = load_index_from_storage(storage_context)
    query_engine = index.as_query_engine()
    responce = query_engine.query(question)
    return responce

In [8]:
# for creating knowledge database
for index,folders in enumerate(os.listdir('store')):

    if index < 1 or index > 1: break
    
    f = os.path.join('store', folders)
    if "json" in folders: break
    print(f,folders)

    try:
        responce_case = answerMe("What is the case number?",f)
        print(str(responce_case))
    except RateLimitError:
        print("reached limit")
        break

    try:
        responce_date = answerMe("What is the date of ruling?",f)
        print(str(responce_date))
    except RateLimitError:
        print("reached limit")
        break

    try:
        responce_defendant = answerMe("What is the defendant(s) name?",f)
        print(str(responce_defendant))
    except RateLimitError:
        print("reached limit")
        break

    try:
        responce_offence = answerMe("List the offence(s)",f)
        print(str(responce_offence))
    except RateLimitError:
        print("reached limit")
        break
    try:
        responce_judgement = answerMe("is this a final judgement?",f)
        print(str(responce_judgement))
    except RateLimitError:
        print("reached limit")
        break

    if "No" in str(responce_judgement):
        print("No")
        responce_punishment = "To be determined"
        punishment_social_work = ""
        punishment_jail_time = ""
        
    elif "Yes" in str(responce_judgement):
        print("Yes")

        try:
            responce_punishment = answerMe("what is the verdict?",f)
            print(str(responce_punishment))
        except RateLimitError:
            print("reached limit")
            break

        # to be implenmented
        punishment_social_work = ""
        punishment_jail_time = ""

    df.loc[len(df)] = ["",responce_case, responce_date, responce_defendant, responce_offence, responce_punishment, punishment_social_work, punishment_jail_time]

store\DCCC000526_2010 DCCC000526_2010

The case number is not provided in the context information.

February 2011

The defendant's name is not given in the context information.

Fraudulently or recklessly inducing others to invest money contrary to sections 1071 A(1) and 2(a) of the Securities and Futures Ordinance Cap.571.

Yes, this is a final judgement. The text includes the District Judge's conclusion that none of the seven charges is proved beyond all reasonable doubt and that the charges are dismissed. The text also includes the District Judge's order for costs to be taxed if not agreed.
Yes

The verdict is that none of the seven charges is proved beyond all reasonable doubt and therefore they are dismissed.
store\DCCC000527_2010 DCCC000527_2010

The case number is 527 & 1272 of 2010.

25 May 2011

Tai Ching

The offence is fraudulently or recklessly inducing others to invest money contrary to section 107 of the Securities and Futures Ordinance, Cap. 571.

Yes, this is a final ju

FileNotFoundError: [Errno 2] No such file or directory: 'e:/intern/4 legal case search/chatgpt test/store/docstore.json/docstore.json'

In [6]:
df.to_csv("test_criminal.csv")