In [1]:
import os
from dotenv import load_dotenv

load_dotenv() 
api_key = os.environ['TOGETHER_API_API_KEY']

In [3]:
import warnings
import requests
import json
import time
from dotenv import find_dotenv

_ = load_dotenv(find_dotenv())
url = f"https://api.together.xyz/inference"
headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }

def llama(prompt, 
          add_inst=True, 
          model="togethercomputer/llama-2-70b-chat", 
          temperature=0.0, 
          max_tokens=1024,
          verbose=False,
          url=url,
          headers=headers,
          base=2, # number of seconds to wait
          max_tries=3):
    
    if add_inst:
        prompt = f"[INST]{prompt}[/INST]"

    if verbose:
        print(f"Prompt:\n{prompt}\n")
        print(f"model: {model}")

    data = {
            "model": model,
            "prompt": prompt,
            "temperature": temperature,
            "max_tokens": max_tokens
        }

    # Allow multiple attempts to call the API incase of downtime.
    # Return provided response to user after 3 failed attempts.
    wait_seconds = [base**i for i in range(max_tries)]

    for num_tries in range(max_tries):
        try:
            response = requests.post(url, headers=headers, json=data)
            return response.json()['output']['choices'][0]['text']
        except Exception as e:
            if response.status_code != 500:
                return response.json()

            print(f"error message: {e}")
            print(f"response object: {response}")
            print(f"num_tries {num_tries}")
            print(f"Waiting {wait_seconds[num_tries]} seconds before automatically trying again.")
            time.sleep(wait_seconds[num_tries])
 
    print(f"Tried {max_tries} times to make API call to get a valid response object")
    print("Returning provided response")
    return response


def llama_chat(prompts, 
               responses,
               model="togethercomputer/llama-2-7b-chat", 
               temperature=0.0, 
               max_tokens=1024,
               verbose=False,
               url=url,
               headers=headers,
               base=2,
               max_tries=3
              ):

    prompt = get_prompt_chat(prompts,responses)

    # Allow multiple attempts to call the API incase of downtime.
    # Return provided response to user after 3 failed attempts.
    wait_seconds = [base**i for i in range(max_tries)]

    for num_tries in range(max_tries):
        try:
            response = llama(prompt=prompt,
                             add_inst=False,
                             model=model, 
                             temperature=temperature, 
                             max_tokens=max_tokens,
                             verbose=verbose,
                             url=url,
                             headers=headers
                            )
            return response
        except Exception as e:
            if response.status_code != 500:
                return response.json()

            print(f"error message: {e}")
            print(f"response object: {response}")
            print(f"num_tries {num_tries}")
            print(f"Waiting {wait_seconds[num_tries]} seconds before automatically trying again.")
            time.sleep(wait_seconds[num_tries])
 
    print(f"Tried {max_tries} times to make API call to get a valid response object")
    print("Returning provided response")
    return response


def get_prompt_chat(prompts, responses):
  prompt_chat = f"<s>[INST] {prompts[0]} [/INST]"
  for n, response in enumerate(prompts):
    prompt = prompts[n + 1]
    prompt_chat += f"\n{response}\n </s><s>[INST] \n{ prompt }\n [/INST]"

  return prompt_chat

In [4]:
with open("../data/cbp-lkg/data/SIMILARITY/MAPPING.json") as f:
    mapping = json.loads(f.read())

dict_mapping = {f'{mapping[k]}':k for k in mapping}

In [5]:
with open("../data/cbp-lkg/data/SIMILARITY/similarity_demo_file.json") as f:
    similarity_data = json.loads(f.read())

In [6]:
similarity_pairs = similarity_data[0]['links_train']
similarity_pairs[:5]

[['798', '719'],
 ['799', '670'],
 ['800', '840'],
 ['801', '840'],
 ['802', '176']]

In [7]:
doc_pairs = []

for pair in similarity_pairs:
    [_s, _o] = pair
    s = dict_mapping[_s]
    o = dict_mapping[_o]
    doc_pairs.append((s, o))

doc_pairs[:5]

[('1206754', '795465'),
 ('51545606', '997135'),
 ('1735815', '1857950'),
 ('383397', '1857950'),
 ('60799', '1844910')]

In [8]:
cases = list(set(mapping.keys()))
cases[:10]

['1857950',
 '119540048',
 '1407895',
 '743328',
 '211970',
 '371933',
 '685234',
 '820931',
 '198827109',
 '1633179']

In [10]:
import random

random.seed(42)
documents_random = random.sample(list(cases), 50)
print(documents_random)

['91635', '374340', '1132299', '70487596', '60799', '380655', '1213021', '623494', '1334665', '611025', '795465', '31239895', '139299795', '248715', '1239673', '113963352', '1878548', '314044', '138192511', '14898186', '1165503', '8919089', '110813550', '4354', '54865315', '7926018', '174974', '1586918', '115718142', '1877956', '1548289', '155579055', '36937', '1035719', '856194', '938898', '919121', '685234', '1822024', '1418391', '1011356', '491934', '1328814', '1767393', '800341', '1462242', '1401567', '399708', '1543623', '1006709']


In [12]:
import pandas as pd

case_details_df = pd.read_csv('../data/cbp-lkg/data/SIMILARITY/MERGED_RESULT.csv')
case_details_df

Unnamed: 0,tid,PERSON,NORP,ORG,FAC,GPE,LOC,PRODUCT,EVENT,WORK_OF_ART,...,/location/geography,/other/percent,/finance/currency,/location/structure,/id/ip_address,/other/organization,title,docsource,acts,infringement
0,928322,V P Anand,Indian,State,Bombay 133,India,-,Gunpower,-,The State of Madras,...,the central govt,-,40 per cent,l q r 518,-,ipc,Ajay Agarwal vs Union Of India And Ors on 5 Ma...,Supreme Court of India,indian penal code\ncode of criminal procedure ...,0
1,699319,Bhandare,T V S N,the High Court,123,India,District,-,-,Misc,...,district,-,-,-,-,court,S.M.D. Kiran Pasha vs Government Of Andhra Pra...,Supreme Court of India,essential commodities act\nconservation of for...,1
2,1988649,Rani Kusum,-,Court,-,Rampur,-,-,-,-,...,-,-,-,rule 1,-,court,Smt. Rani Kusum vs Smt. Kanchan Devi And Ors o...,Supreme Court of India,code of civil procedure\nconsumer protection a...,0
3,1362442,Gupta,-,Court,-,RAMASWAMY,-,Addl,-,J D,...,-,12,2 000,rule 4,-,court,Desh Bandhu Gupta vs N.L.Anand & Rajinder Sing...,Supreme Court of India,,0
4,357132,Patil,Indian,the High Court,the Mohol Sub Jail,Baburao,-,-,-,D S P,...,-,-,-,the mohol sub jail,-,court,Baburao Bajirao Patil vs State Of Maharashtra ...,Supreme Court of India,indian evidence act\n,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
953,12050516,Shri H K Choudhary,Indian,TPO,the Mumbai Bench,Delhi,-,No 4791 Del 2015,-,Lump Sum Turn Key,...,assessee,100,-,rule 10b 3,-,-,"Terex India Pvt. Ltd., Tamil Nadu vs Dcit, New...",Income Tax Appellate Tribunal - Delhi,incometax act\n,1
954,123737198,Sanjay Dalia,Indian,the Supreme Court,Community Centre,Deogarh,-,-,-,Dhodha House,...,-,-,-,nipun tower,-,-,Ultra Home Construction Pvt. Ltd vs Purushotta...,Delhi High Court,code of civil procedure\ncopyright act\ntrade ...,1
955,1307808,Oswal Woollen,-,the High Court,the Calcutta Port,Ahmedabad,-,-,-,-,...,-,-,-,oswal woollen,-,the high court,Union Of India & Ors vs Adani Exports Ltd. & A...,Supreme Court of India,-,0
956,1747781,Ganesh,Indian,Company,Delhi 329,Hyderabad,-,Class 34,-,Edwards Vs Dennis,...,trade mark,-,trade marks,sections 46,-,the trade marks,Vishnudas Trading As Vishnudas vs The Vazir Su...,Supreme Court of India,general clauses act\ncopyright act\ntrade mark...,1


In [None]:
%%writefile ../legal-text-extract-for-summarization.sh

#!/bin/bash

source_dir="../data/cbp-lkg/data/LEGAL_TEXT/"
destination_dir="../results/case-similarity-text/for-summarization"

mkdir -p "$destination_dir"

for file in "$source_dir"/*.txt; do
    filename=$(basename -- "$file")
    destination_path="$destination_dir/$filename"
    cat "$file" | ag "\S" | head -n 50 | tail -n +20 > "$destination_path"
    if [ "$(wc -l < "$source_dir$filename")" -gt 30 ]; then
      cat "$file" | ag "\S" | tail -n 20 | head -n -4 >> "$destination_path"
    fi
    echo "Wrote $destination_path ..."
done

In [None]:
!chmod +x ../legal-text-extract-for-summarization.sh
!../legal-text-extract-for-summarization.sh | tail

In [117]:
case_details_df[case_details_df['tid'] == 123737198]['title'].shape[0]

1

In [125]:
import numpy as np

def get_example(doc_id):
    case_name_df = case_details_df[case_details_df['tid'] == doc_id].reset_index(drop=True)['title']
    s = ''
    if case_name_df.shape[0] == 1:
        s += '### Case Name: ' + case_details_df[case_details_df['tid'] == doc_id].reset_index(drop=True)['title'][0]
    with open(f'../results/case-similarity-text/tail/{doc_id}.txt') as f:
        s += '\n### Judgment Excerpt: ' + f.read()
    acts_df = case_details_df[case_details_df['tid'] == doc_id].reset_index(drop=True)['acts']
    if acts_df.shape[0] == 1:
        acts = acts_df[0]
        if acts != np.nan and len(acts) > 0 and acts != '-':
            s += '### Acts Mentioned: ' + str(acts.split('\n')[:-1])
    return s

In [126]:
print(get_example(123737198))

### Case Name: Ultra Home Construction Pvt. Ltd vs Purushottam Kumar Chaubey & Ors on 20 January, 2016
### Judgment Excerpt: > backdrop of the observations in Dhodha House (supra), the appellant /
> plaintiff carries on business in Deogarh, Jharkhand. It may also carry on
> business at Delhi. But, because the cause of action has allegedly arisen in
> Deogarh, Jharkhand, and not in Delhi, the appellant/plaintiff cannot sue the
> defendants/respondents in Delhi in view of the decision in Sanjay Dalia
> (supra). Thus, this court does not have the territorial jurisdiction to
> entertain the suit. And, on this count, the decision of the learned single
> judge cannot be faulted. But, the suit ought not to have been dismissed. As
> FAO (OS) 494/15 Page 25 of 26 this court did not have jurisdiction, the
> plaint ought to have been returned under order 7 rule 10 CPC. Only to that
> extent, the learned single judge had erred.
19\. In sum, the dismissal of the suit is set aside. Since this court 

In [120]:
def get_prompt(id):
    p = f'''You are given a part of a court judgment along with which case it belongs to. You are a legal expert.

{get_example(id)}

Write a summary of this court judgment.'''
    return p

In [121]:
print(get_prompt(123737198))

You are given a part of a court judgment along with which case it belongs to. You are a legal expert.

### Case Name: Ultra Home Construction Pvt. Ltd vs Purushottam Kumar Chaubey & Ors on 20 January, 2016
### Judgment Excerpt: > backdrop of the observations in Dhodha House (supra), the appellant /
> plaintiff carries on business in Deogarh, Jharkhand. It may also carry on
> business at Delhi. But, because the cause of action has allegedly arisen in
> Deogarh, Jharkhand, and not in Delhi, the appellant/plaintiff cannot sue the
> defendants/respondents in Delhi in view of the decision in Sanjay Dalia
> (supra). Thus, this court does not have the territorial jurisdiction to
> entertain the suit. And, on this count, the decision of the learned single
> judge cannot be faulted. But, the suit ought not to have been dismissed. As
> FAO (OS) 494/15 Page 25 of 26 this court did not have jurisdiction, the
> plaint ought to have been returned under order 7 rule 10 CPC. Only to that
> extent, the

In [122]:
response = llama(get_prompt(123737198), verbose=True)

Prompt:
[INST]You are given a part of a court judgment along with which case it belongs to. You are a legal expert.

### Case Name: Ultra Home Construction Pvt. Ltd vs Purushottam Kumar Chaubey & Ors on 20 January, 2016
### Judgment Excerpt: > backdrop of the observations in Dhodha House (supra), the appellant /
> plaintiff carries on business in Deogarh, Jharkhand. It may also carry on
> business at Delhi. But, because the cause of action has allegedly arisen in
> Deogarh, Jharkhand, and not in Delhi, the appellant/plaintiff cannot sue the
> defendants/respondents in Delhi in view of the decision in Sanjay Dalia
> (supra). Thus, this court does not have the territorial jurisdiction to
> entertain the suit. And, on this count, the decision of the learned single
> judge cannot be faulted. But, the suit ought not to have been dismissed. As
> FAO (OS) 494/15 Page 25 of 26 this court did not have jurisdiction, the
> plaint ought to have been returned under order 7 rule 10 CPC. Only to that

In [123]:
print(response)

  The court judgment is for the case "Ultra Home Construction Pvt. Ltd vs Purushottam Kumar Chaubey & Ors" and it pertains to the issue of territorial jurisdiction of a court to entertain a suit. The appellant/plaintiff, Ultra Home Construction Pvt. Ltd, carries on business in Deogarh, Jharkhand and also has a branch office in Delhi. The defendants/respondents, Purushottam Kumar Chaubey and others, are residents of Delhi. The cause of action for the suit allegedly arose in Deogarh, Jharkhand, and not in Delhi. Therefore, the court held that it does not have the territorial jurisdiction to entertain the suit as per the decision in Sanjay Dalia (supra).

The court further held that the suit ought not to have been dismissed but instead, the plaint should have been returned to the appellant/plaintiff for presentation before the proper court. The court directed the return of the plaint to the appellant/plaintiff and disposed of the appeal. The parties were directed to bear their own costs.


In [131]:
from tqdm.notebook import tqdm

prompts = []
summaries = []

for doc_id in tqdm(documents_random):
    prompt = get_prompt(doc_id)
    response = llama(prompt)
    prompts.append(prompt)
    summaries.append(response)
    # except:
    #     print('Error: ' + doc_id)

  0%|          | 0/50 [00:00<?, ?it/s]

In [136]:
summaries_df = pd.DataFrame({
    'jid': documents_random,
    'prompt': [f'[INST]{p}[/INST]' for p in prompts],
    'summary': summaries
})
summaries_df

Unnamed: 0,jid,prompt,summary
0,91635,[INST]You are given a part of a court judgment...,The court judgment is about a dispute over a...
1,374340,[INST]You are given a part of a court judgment...,The court judgment is regarding an appeal ag...
2,1132299,[INST]You are given a part of a court judgment...,The court judgment is regarding a trade mark...
3,70487596,[INST]You are given a part of a court judgment...,This court judgment is regarding an applicat...
4,60799,[INST]You are given a part of a court judgment...,The Supreme Court of India allowed several c...
5,380655,[INST]You are given a part of a court judgment...,The court judgment is regarding a case where...
6,1213021,[INST]You are given a part of a court judgment...,The Supreme Court of India has allowed the c...
7,623494,[INST]You are given a part of a court judgment...,The court judgment is regarding an appeal ag...
8,1334665,[INST]You are given a part of a court judgment...,The summary of this court judgment is that t...
9,611025,[INST]You are given a part of a court judgment...,This court judgment is allowing an appeal in...


In [137]:
summaries_df.to_csv('../results/case-similarity-summaries-llama2-70b-chat-030324-final.csv', index=False)