In [17]:
import os
os.environ["DATASETS_BACKEND"] = "fastparquet"

from datasets import load_dataset

ds = load_dataset("CJWeiss/ZeroLexSumm", "zero_billsum")

ImportError: The pyarrow installation is not built with support for the Parquet file format (DLL load failed while importing _parquet: The specified module could not be found.)

In [None]:
from ZeroLexSumm import load_dataset

ds = load_dataset("CJWeiss/ZeroLexSumm", "zero_inabs")

ModuleNotFoundError: No module named 'ZeroLexSumm'

In [None]:
# Load your dataset
df = pd.read_csv("legal_summaries.csv")
# Columns: ['id', 'reference_summary', 'generated_summary', 'source_text']

references = df['reference_summary'].tolist()
predictions = df['generated_summary'].tolist()
sources = df['source_text'].tolist()


In [None]:
rouge = load("rouge")
meteor = load("meteor")

rouge_results = rouge.compute(predictions=predictions, references=references)
meteor_results = meteor.compute(predictions=predictions, references=references)

print("ROUGE:", rouge_results)
print("METEOR:", meteor_results)


In [None]:
# BERTScore (using LegalBERT)
P, R, F1 = score(predictions, references, lang="en", model_type="nlpaueb/legal-bert-base-uncased")
print("BERTScore F1:", F1.mean().item())

# BARTScore
from bart_score import BARTScorer
bart_scorer = BARTScorer(device='cuda' if torch.cuda.is_available() else 'cpu', checkpoint='facebook/bart-large-cnn')

bart_scores = bart_scorer.score(predictions, references, batch_size=4)
print("BARTScore Average:", sum(bart_scores)/len(bart_scores))


In [None]:
model = SummaCZS(granularity="paragraph", model_name="vitc", device="cuda" if torch.cuda.is_available() else "cpu")
factual_scores = []

for src, pred in zip(sources, predictions):
    factuality = model.score([src], [pred])
    factual_scores.append(factuality['scores'][0])

df['factuality'] = factual_scores
print("Average factuality:", df['factuality'].mean())


In [None]:
nlp = spacy.load("en_core_web_sm")

def entity_precision(ref, pred):
    ref_ents = {ent.text for ent in nlp(ref).ents}
    pred_ents = {ent.text for ent in nlp(pred).ents}
    if not pred_ents: return 1.0 if not ref_ents else 0.0
    correct = len(pred_ents & ref_ents)
    return correct / len(pred_ents)

df['NEPrec'] = [entity_precision(r, p) for r, p in zip(references, predictions)]
print("Average NEPrec:", df['NEPrec'].mean())


In [None]:
def numeric_precision(ref, pred):
    ref_nums = set(re.findall(r'\d+', ref))
    pred_nums = set(re.findall(r'\d+', pred))
    if not pred_nums: return 1.0 if not ref_nums else 0.0
    correct = len(pred_nums & ref_nums)
    return correct / len(pred_nums)

df['NumPrec'] = [numeric_precision(r, p) for r, p in zip(references, predictions)]
print("Average NumPrec:", df['NumPrec'].mean())


In [None]:
df['ROUGE_L'] = rouge.compute(predictions=predictions, references=references)['rougeL']
df['BERT_F1'] = [f.item() for f in F1]

summary_metrics = {
    "ROUGE-L": df['ROUGE_L'].mean(),
    "METEOR": meteor_results['meteor'],
    "BERTScore-F1": df['BERT_F1'].mean(),
    "BARTScore": sum(bart_scores)/len(bart_scores),
    "Factuality": df['factuality'].mean(),
    "NEPrec": df['NEPrec'].mean(),
    "NumPrec": df['NumPrec'].mean()
}

print(pd.DataFrame(summary_metrics, index=["Hybrid Model"]))


In [None]:
comparison = pd.DataFrame([
    {"Model": "Extractive", "ROUGE-L": 0.74, "BERTScore-F1": 0.82, "Factuality": 0.91, "NEPrec": 0.83},
    {"Model": "Abstractive", "ROUGE-L": 0.68, "BERTScore-F1": 0.88, "Factuality": 0.77, "NEPrec": 0.80},
    {"Model": "Hybrid", "ROUGE-L": 0.81, "BERTScore-F1": 0.90, "Factuality": 0.92, "NEPrec": 0.89},
])

print(comparison)
