In [2]:
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
    
# Load model & tokenizer
model_name = "google/pegasus-large"
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)

Using device: cuda


In [3]:
def chunk_text(text, tokenizer, max_length=1024, overlap=200):
    tokens = tokenizer.encode(text)
    chunks = []
    
    for i in range(0, len(tokens), max_length - overlap):
        chunk = tokens[i : i + max_length]
        chunks.append(tokenizer.decode(chunk))
    
    return chunks

In [4]:
def summarize_chunks(chunks, model, tokenizer):
    summaries = []
    
    for chunk in chunks:
        inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=1024).to(device)
        
        with torch.no_grad():  # Disable gradient calculations for inference
            summary_ids = model.generate(**inputs, max_length=256)
        
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        summaries.append(summary)
    
    return summaries

In [5]:
import pandas as pd
data=pd.read_csv("legal_summaries.csv")

In [1]:
import warnings
import tensorflow as tf

# Suppress all warnings
warnings.filterwarnings("ignore")

# Suppress TensorFlow deprecation warnings
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)






In [6]:
legal_text = data['input_text'][1]
chunks = chunk_text(legal_text, tokenizer)

# Summarize each chunk
chunk_summaries = summarize_chunks(chunks, model, tokenizer)

# Combine mini-summaries into a final summary
final_summary = " ".join(chunk_summaries)
print(final_summary)

Token indices sequence length is longer than the specified maximum sequence length for this model (2976 > 1024). Running this sequence through the model will result in indexing errors


This appeal arises out of a suit brought by the respondent in the court of the Subordinate Judge, Dhanbad, for recovery of arrears of royalty and cess from the appellant and another alleged to be due under a compromise decree passed on the 6th March, 1923, in a previ ous suit between the predecessors in interest of the par ties. On 7th June, 1921, Kumar executed another perma nent patta leasing the right to the coal in 500 bighas out of the 5,800 bighas referred to above to one Prayngji Bal lavji Deoshi and his son Harakchand Deoshi (hereinafter referred to as the Deoshis). On 11th March, 1921, one Kumar Krishna Prasad Singh (hereinafter referred to as Kumar) granted a perma nent lease of the right to the underground coal in 5,800 bighas of land belonging to him to Shibsaran Singh and Sitaram Singh (hereinafter referred to as the Singhs) by a registered patta stipulating for a salami of Rs. On 7th June, 1921, Kumar executed another perma nent patta leasing the right to the coal in 500 

In [10]:
data['input_text'][1]

'Civil Appeal No.94 of 1949.\n107 834 Appeal from a judgment and decree of the High Court of Judi cature at Patna in Appeal from Appellate Decree No. 97 of 1946 (Mannohar Lall and Mukherji JJ.) dated 23rd Decem ber, 1947, confirming the judgment of the District Judge of Purulia in Appeal No. 159 of 1944.\nS.P. Sinha (P. K. Bose, with him) for the appel lant.\nN.C. Chatterjee and Panchanan Ghosh (Chandra Narayan Naik, with them) for the respondent. 1950.\nDecember 1.\nThe Judgment of the Court was deliv ered by PATANJALI SASTRI J.\nThis appeal arises out of a suit brought by the respondent in the court of the Subordinate Judge, Dhanbad, for recovery of arrears of royalty and cess from the appellant and another alleged to be due under a compromise decree passed on the 6th March, 1923, in a previ ous suit between the predecessors in interest of the par ties.\nThe only plea which is material for the purpose of this appeal is that the compromise decree not having been registered was inadmis