In [1]:
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm

from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE


'cuda'

In [9]:
df_test = pd.read_csv(
    r"E:\final year project\justifi-legal-ai\datasets\binary_test\CJPE_ext_SCI_HCs_Tribunals_daily_orders_test.csv"
)[["text", "label"]].dropna()


In [10]:
df_test.head()

Unnamed: 0,text,label
0,The plaintiffs are the appellants against the ...,1
1,(Judgment of the Court was made by The present...,0
2,"Mr. E.I.Sanmathi, Adv.for Ms. Vani H, Adv.for ...",0
3,"Govindasamy J. Lakshmi Vilas Bank, the first r...",0
4,The 1st petitioner was an LPG distributor at K...,0


In [3]:
MODEL_PATH = "inlegalbert_final"  # folder with config.json, model.safetensors

tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_PATH,
    use_safetensors=True
).to(DEVICE)

model.eval()


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [4]:
@torch.no_grad()
def predict_chunks(text, stride=256):
    encodings = tokenizer(
        text,
        truncation=True,
        padding="max_length",
        max_length=512,
        stride=stride,
        return_overflowing_tokens=True,
        return_tensors="pt"
    )

    probs_list = []

    for i in range(encodings["input_ids"].shape[0]):
        inputs = {
            "input_ids": encodings["input_ids"][i:i+1].to(DEVICE),
            "attention_mask": encodings["attention_mask"][i:i+1].to(DEVICE)
        }

        outputs = model(**inputs)
        probs = torch.softmax(outputs.logits, dim=1)
        probs_list.append(probs.cpu().numpy())

    return np.vstack(probs_list)


In [5]:
def aggregate_predictions(chunk_probs):
    avg_probs = chunk_probs.mean(axis=0)
    label_id = int(np.argmax(avg_probs))
    confidence = float(np.max(avg_probs))

    label = "ACCEPT" if label_id == 1 else "REJECT"
    return label, confidence


In [6]:
def predict_document(text):
    if not isinstance(text, str) or len(text.strip()) < 50:
        return {
            "prediction": "TEXT_TOO_SHORT",
            "confidence": 0.0,
            "num_chunks": 0
        }

    chunk_probs = predict_chunks(text)
    label, confidence = aggregate_predictions(chunk_probs)

    return {
        "prediction": label,
        "confidence": round(confidence, 4),
        "num_chunks": chunk_probs.shape[0]
    }


In [7]:
def evaluate_documents(df, limit=None):
    y_true, y_pred = [], []

    df_iter = df if limit is None else df.head(limit)

    for _, row in tqdm(df_iter.iterrows(), total=len(df_iter)):
        res = predict_document(row["text"])
        y_true.append(row["label"])
        y_pred.append(1 if res["prediction"] == "ACCEPT" else 0)

    return y_true, y_pred


In [None]:
y_true, y_pred = evaluate_documents(df_test, limit=500)

print("Accuracy :", accuracy_score(y_true, y_pred))
print("Precision:", precision_score(y_true, y_pred))
print("Recall   :", recall_score(y_true, y_pred))
print("F1-score :", f1_score(y_true, y_pred))


100%|██████████| 500/500 [02:51<00:00,  2.92it/s]

Accuracy : 0.7
Precision: 0.6887966804979253
Recall   : 0.6887966804979253
F1-score : 0.6887966804979253





In [12]:
y_true, y_pred = evaluate_documents(df_test, limit=700)

print("Accuracy :", accuracy_score(y_true, y_pred))
print("Precision:", precision_score(y_true, y_pred))
print("Recall   :", recall_score(y_true, y_pred))
print("F1-score :", f1_score(y_true, y_pred))


100%|██████████| 700/700 [03:56<00:00,  2.96it/s]

Accuracy : 0.7028571428571428
Precision: 0.6923076923076923
Recall   : 0.6923076923076923
F1-score : 0.6923076923076923





In [13]:
import pdfplumber

def read_pdf(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + " "
    return text.strip()


In [14]:
def predict_pdf(pdf_path):
    text = read_pdf(pdf_path)
    return predict_document(text)


In [15]:
pdf_path = r"C:\Users\vikas\Downloads\Amar_Singh_vs_The_State_Nct_Of_Delhi_on_12_October_2020_1.PDF"
predict_pdf(pdf_path)


{'prediction': 'ACCEPT', 'confidence': 0.6349, 'num_chunks': 30}

In [16]:
from sklearn.metrics import confusion_matrix, classification_report

print(confusion_matrix(y_true, y_pred))
print(classification_report(y_true, y_pred, digits=4))


[[258 104]
 [104 234]]
              precision    recall  f1-score   support

           0     0.7127    0.7127    0.7127       362
           1     0.6923    0.6923    0.6923       338

    accuracy                         0.7029       700
   macro avg     0.7025    0.7025    0.7025       700
weighted avg     0.7029    0.7029    0.7029       700

