# PLS Generation with **Claude** + Readability + AlignScore

This notebook downloads biomedical abstracts, generates Plain Language Summaries (PLS) using **Anthropic Claude**, computes readability metrics via `textstat`, and evaluates factual alignment with **AlignScore**.


## Setup & Installs

In [2]:
# If running locally/Colab, uncomment installs as needed
# !pip install anthropic pandas requests
# AlignScore requires a specific lightning; install with no-deps to avoid conflicts
# !pip install git+https://github.com/yuh-zha/AlignScore.git --no-deps
# !pip install pytorch-lightning==1.9.5
# !pip install bert-score evaluate py-readability-metrics

Collecting py-readability-metrics
  Downloading py_readability_metrics-1.4.5-py3-none-any.whl.metadata (8.8 kB)
Downloading py_readability_metrics-1.4.5-py3-none-any.whl (26 kB)
Installing collected packages: py-readability-metrics
Successfully installed py-readability-metrics-1.4.5


## Imports & API Key

In [1]:
from google.colab import userdata
import os, time, json, math, requests
from pathlib import Path
import pandas as pd
from readability import Readability
import torch
import math, inspect, re
import nltk
nltk.download('punkt_tab')
from alignscore import AlignScore

try:
    import anthropic
except Exception as e:
    print("anthropic SDK not installed. Please run the pip install cell above.")
    raise

ANTHROPIC_API_KEY = userdata.get('ANTHROPIC_API_KEY')
os.environ['ANTHROPIC_API_KEY'] = ANTHROPIC_API_KEY
if 'ANTHROPIC_API_KEY' not in os.environ:
    print("⚠️  Missing ANTHROPIC_API_KEY in environment. Set it before calling the API.")

client = anthropic.Anthropic(api_key=os.environ.get('ANTHROPIC_API_KEY'))
CLAUDE_MODEL = "claude-sonnet-4-5-20250929"

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [6]:
#!sed -i '3s/.*/from torch.optim import AdamW\nfrom transformers import get_linear_schedule_with_warmup, AutoConfig/' /usr/local/lib/python3.12/dist-packages/alignscore/model.py

## Load Local Texts

In [2]:
df_abs = pd.read_csv('/content/downloaded_texts_content.csv')
df_abs.head()

Unnamed: 0,filename,text
0,10.1002-14651858.CD014257.pub2-abstract.txt,Background\nFunctional constipation is defined...
1,10.1002-14651858.CD003459.pub4-abstract.txt,Methotrexate for treatment of active treatment...
2,10.1002-14651858.CD013446-abstract.txt,Background\nVitamin D deficiency during pregna...
3,10.1002-14651858.CD013283.pub2-abstract.txt,Background\nHypoglycaemia is a common occurren...
4,10.1002-14651858.CD014953.pub2-abstract.txt,Background\nCataract surgery is the most commo...


In [6]:
import csv
pls_file_names = []
with open('/content/pls_names.csv', 'r') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        pls_file_names.append(row[0]) # Assuming each row contains one filename

print(f"Loaded {len(pls_file_names)} filenames from pls_names.csv")

Loaded 300 filenames from pls_names.csv


In [7]:
github_base_urls = [
    "https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/llms_testing/Cochrane/ground_truth/",
    "https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/data_collection_and_processing/Data%20Sources/Cochrane/test/pls/",
    "https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/data_collection_and_processing/Data%20Sources/Cochrane/train/pls/"]
local_download_dir = 'pls_downloaded_texts'

# Create the local directory if it doesn't exist
os.makedirs(local_download_dir, exist_ok=True)

downloaded_count = 0

for file_name in pls_file_names:
    local_file_path = os.path.join(local_download_dir, file_name)
    downloaded = False

    if os.path.exists(local_file_path):
        print(f"File {file_name} already exists locally. Skipping download.")
        downloaded_count += 1
        downloaded = True
        continue # Skip to the next file_name if already downloaded


    for base_url in github_base_urls:
        file_url = base_url + file_name

        try:
            print(f"Attempting to download {file_name} from {base_url}")
            file_content_response = requests.get(file_url)
            file_content_response.raise_for_status()  # Raise an HTTPError for bad responses (4xx or 5xx)

            with open(local_file_path, 'w', encoding='utf-8') as f:
                f.write(file_content_response.text)
            downloaded_count += 1
            downloaded = True
            print(f"Successfully downloaded {file_name}")
            break  # Move to the next file_name if download is successful

        except requests.exceptions.RequestException as e:
            print(f"Could not download {file_name} from {base_url}: {e}")
        except Exception as e:
            print(f"An unexpected error occurred while processing {file_name} from {base_url}: {e}")

    if not downloaded:
        print(f"Warning: Could not download {file_name} from any of the provided URLs.")


print(f"Download complete. Successfully downloaded {downloaded_count} out of {len(pls_file_names)} files.")

File 10.1002-14651858.CD000371.pub7-pls.txt already exists locally. Skipping download.
File 10.1002-14651858.CD001218.pub3-pls.txt already exists locally. Skipping download.
File 10.1002-14651858.CD001977.pub2-pls.txt already exists locally. Skipping download.
File 10.1002-14651858.CD002201.pub6-pls.txt already exists locally. Skipping download.
File 10.1002-14651858.CD002779.pub3-pls.txt already exists locally. Skipping download.
File 10.1002-14651858.CD002948.pub2-pls.txt already exists locally. Skipping download.
File 10.1002-14651858.CD003147.pub5-pls.txt already exists locally. Skipping download.
File 10.1002-14651858.CD003315.pub3-pls.txt already exists locally. Skipping download.
File 10.1002-14651858.CD003459.pub4-pls.txt already exists locally. Skipping download.
File 10.1002-14651858.CD003552.pub4-pls.txt already exists locally. Skipping download.
File 10.1002-14651858.CD003737.pub4-pls.txt already exists locally. Skipping download.
File 10.1002-14651858.CD004019.pub4-pls.txt

In [8]:
pls_downloaded_texts_content = {}
for filename in os.listdir(local_download_dir):
    if filename.endswith('.txt'):
        filepath = os.path.join(local_download_dir, filename)
        with open(filepath, 'r', encoding='utf-8') as f:
            clean_name = re.sub(r"\.pub\d+-pls\.txt|-pls\.txt", "", filename)
            pls_downloaded_texts_content[clean_name] = f.read()

## Generate PLS with Claude

In [33]:
prompt = """ Using the following abstract of a biomedical study as input, generate a Plain Language Summary
(PLS) understandable by any patient, regardless of their health literacy. Ensure that the generated text
adheres to the following instructions which should be followed step-by-step:
a. Specific Structure: The generated PLS should be presented in a logical order, using the following
order:
1. Plain Title
2. Rationale
3. Trial Design
4. Results
b. Sections should be authored following these parameters:
1. Plain Title: Simplified title understandable to a layperson that summarizes the research that was
done.
2. Rationale: Include: background or study rationale providing a general description of the
condition, what it may cause or why it is a burden for the patients; the reason and main hypothesis
for the study; and why the study is needed, and why the study medication has the potential to
treat the condition.
3. Trial Design: Answer ‘How is this study designed?’ Include the description of the design,
description of study and patient population (age, health condition, gender), and the expected
amount of time a person will be in the study.
4. Results: Answer ‘What were the main results of the study’, include the benefits for the patients,
how the study was relevant for the area of study, and the conclusions from the investigator.
c. Consistency and Replicability: The generated PLS should be consistent regardless of the order of
sentences or the specific phrasing used in the input protocol text.
d. Compliance with Plain Language Guidelines: The generated PLS must follow all these plain
language guidelines:
• Have readability grade level of 6 or below.
• Do not have jargon. All technical or medical words or terms should be defined or broken down
into simple and logical explanations.
• Active voice, not passive.
• Mostly one or two syllable words.
• Sentences of 15 words or less.
• Short paragraphs of 3-5 sentences.
• Simple numbers (e.g., ratios, no percentages).
e. Do not invent Content: The AI model should not invent information. If the AI model includes data
other than the one given in the input abstract, the AI model should guarantee such data is verified and
real.
f. Aim for an approximate PLS length of 500-900 words.


Abstract of a biomedical study text: {text}
"""

def simplify_with_claude(text, max_tokens=1200, temperature=0.3):
    rendered_prompt = prompt.format(text=text)
    # Uses Messages API
    msg = client.messages.create(
        model=CLAUDE_MODEL,
        max_tokens=max_tokens,
        temperature=temperature,
        system="You are concise, careful, and faithful to the source.",
        messages=[{"role": "user", "content": [{"type": "text", "text": rendered_prompt}]}])
    # Extract plain text from the response
    parts = []
    for block in msg.content:
        if block.type == "text":
            parts.append(block.text)
    return "\n".join(parts).strip()

simplified_texts = {}
timings = []

for i, row in enumerate(df_abs.itertuples()):
    start = time.time()
    try:
        simplified = simplify_with_claude(row.text)
        simplified_texts[row.filename] = simplified
        dt = time.time() - start
        timings.append(dt)
        print(f"[{i}/{len(df_abs)}] {row.filename}: {dt:.1f}s")
    except anthropic.APIStatusError as e:
        print(f"API error for {row.filename}: {e.status_code} {e.message}")
    except Exception as e:
        print(f"Unexpected error for {row.filename}: {e}")

if timings:
    print(f"Average time per file: {sum(timings)/len(timings):.1f}s")

[0/300] 10.1002-14651858.CD014257.pub2-abstract.txt: 20.7s
[1/300] 10.1002-14651858.CD003459.pub4-abstract.txt: 19.1s
[2/300] 10.1002-14651858.CD013446-abstract.txt: 19.2s
[3/300] 10.1002-14651858.CD013283.pub2-abstract.txt: 21.3s
[4/300] 10.1002-14651858.CD014953.pub2-abstract.txt: 20.3s
[5/300] 10.1002-14651858.CD015397-abstract.txt: 19.8s
[6/300] 10.1002-14651858.CD013173.pub2-abstract.txt: 20.3s
[7/300] 10.1002-14651858.CD013194-abstract.txt: 22.8s
[8/300] 10.1002-14651858.CD013276.pub2-abstract.txt: 19.7s
[9/300] 10.1002-14651858.CD013705.pub3-abstract.txt: 22.8s
[10/300] 10.1002-14651858.CD008838.pub2-abstract.txt: 19.0s
[11/300] 10.1002-14651858.CD010613.pub2-abstract.txt: 19.5s
[12/300] 10.1002-14651858.CD013333.pub2-abstract.txt: 21.8s
[13/300] 10.1002-14651858.CD009961.pub2-abstract.txt: 21.1s
[14/300] 10.1002-14651858.CD013444.pub2-abstract.txt: 19.1s
[15/300] 10.1002-14651858.CD013259.pub2-abstract.txt: 41.8s
[16/300] 10.1002-14651858.CD004474.pub2-abstract.txt: 21.6s
[17/3

## Build DataFrame

In [13]:
df_abs['original_text'] = df_abs['text']
df_abs = df_abs.drop(columns=['text'])
df_abs["simplified_text"] = df_abs["filename"].map(simplified_texts)
df_abs['pls'] = df_abs['filename'].map(pls_downloaded_texts_content)
df = df_abs.dropna(subset=["original_text", "simplified_text"])

print(df.shape)
display(df.head())

df.to_csv("simplified_texts.csv", index_label="filename")
print("Saved to simplified_texts.csv")

NameError: name 'simplified_texts' is not defined

## Readability Metrics (textstat)

In [9]:
df = pd.read_csv("/content/simplified_texts (1).csv")
df.drop(columns=["filename"], inplace=True)
df.rename(columns={"filename.1": "filename"}, inplace=True)
df["base_filename"] = df["filename"].str.replace(r"\.pub\d+-abstract\.txt|-abstract\.txt", "", regex=True)
df["pls"] = df["base_filename"].map(pls_downloaded_texts_content)

display(df.head(10))
display(df.info())

Unnamed: 0,filename,original_text,simplified_text,base_filename,pls
0,10.1002-14651858.CD014257.pub2-abstract.txt,Background\nFunctional constipation is defined...,# Plain Language Summary: Probiotics for Treat...,10.1002-14651858.CD014257,Probiotics for treatment of chronic constipati...
1,10.1002-14651858.CD003459.pub4-abstract.txt,Methotrexate for treatment of active treatment...,# Plain Language Summary: Methotrexate for Tre...,10.1002-14651858.CD003459,Methotrexate for treatment of active treatment...
2,10.1002-14651858.CD013446-abstract.txt,Background\nVitamin D deficiency during pregna...,# Plain Language Summary: Testing Different Am...,10.1002-14651858.CD013446,Regimens of vitamin D supplementation for wome...
3,10.1002-14651858.CD013283.pub2-abstract.txt,Background\nHypoglycaemia is a common occurren...,# Plain Language Summary: Best Way to Give Sug...,10.1002-14651858.CD013283,First aid glucose administration routes for sy...
4,10.1002-14651858.CD014953.pub2-abstract.txt,Background\nCataract surgery is the most commo...,# Plain Language Summary\n\n## Plain Title\nDo...,10.1002-14651858.CD014953,Can virtual reality training for cataract surg...
5,10.1002-14651858.CD015397-abstract.txt,Background\nWith the emergence of SARS‐CoV‐2 i...,# Plain Language Summary\n\n## Plain Title\nWh...,10.1002-14651858.CD015397,Unintended consequences of school‐based measur...
6,10.1002-14651858.CD013173.pub2-abstract.txt,Background\nAutistic spectrum disorder (ASD) i...,# Plain Language Summary: Talking Therapy for ...,10.1002-14651858.CD013173,Behavioural and cognitive behavioural therapy ...
7,10.1002-14651858.CD013194-abstract.txt,Background\nMelanoma has one of the fastest ri...,# Plain Language Summary: Checking Skin Spots ...,10.1002-14651858.CD013194,How accurate is visual inspection of skin lesi...
8,10.1002-14651858.CD013276.pub2-abstract.txt,Background\nA considerable challenge for mater...,# Plain Language Summary: Using Early Warning ...,10.1002-14651858.CD013276,Physiological track‐and‐trigger/early warning ...
9,10.1002-14651858.CD013705.pub3-abstract.txt,Background\nAccurate rapid diagnostic tests fo...,# Plain Language Summary: Rapid Tests to Detec...,10.1002-14651858.CD013705,How accurate are rapid antigen tests for diagn...


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   filename         300 non-null    object
 1   original_text    300 non-null    object
 2   simplified_text  300 non-null    object
 3   base_filename    300 non-null    object
 4   pls              300 non-null    object
dtypes: object(5)
memory usage: 11.8+ KB


None

In [10]:
df['simplified_text'][0]

'# Plain Language Summary: Probiotics for Treating Constipation in Children\n\n## Plain Title\nDo probiotics help children with long-term constipation that has no physical cause?\n\n## Rationale\n\n**What is the health problem?**\n\nSome children have long-term constipation with no physical cause. Doctors call this "functional constipation." These children have trouble pooping regularly, even though tests show nothing is wrong with their body. This problem is very common. Up to 1 in 4 visits to children\'s gut doctors are because of this issue. It causes a lot of discomfort and worry for children and their families.\n\n**Why was this study needed?**\n\nOur gut contains tiny living things called bacteria. These bacteria help our body work properly. Probiotics are "good" bacteria that people can take as pills or drinks. Scientists think probiotics might help change the mix of bacteria in the gut. This could help the gut work better and make pooping easier.\n\n**What did researchers want 

In [11]:
df['original_text'][0]

"Background\nFunctional constipation is defined as chronic constipation with no identifiable underlying cause. It is a significant cause of morbidity in children, accounting for up to 25% of visits to paediatric gastroenterologists. Probiotic preparations may sufficiently alter the gut microbiome and promote normal gut physiology in a way that helps relieve functional constipation. Several studies have sought to address this hypothesis, as well as the role of probiotics in other functional gut disorders. Therefore, it is important to have a focused review to assess the evidence to date. \nObjectives\nTo evaluate the efficacy and safety of probiotics for the management of chronic constipation without a physical explanation in children. \nSearch methods\nOn 28 June 2021, we searched CENTRAL, MEDLINE, Embase, CINAHL, AMED, WHO ICTR, and ClinicalTrials.gov, with no language, date, publication status, or document type limitations. \nSelection criteria\nWe included randomised controlled tria

In [12]:
def safe_readability(fn, txt):
    try:
        return fn(txt) if isinstance(txt, str) and txt.strip() else float('nan')
    except Exception:
        return float('nan')

def readability_metrics(text):
    try:
        r = Readability(text)
        fk_grade = r.flesch_kincaid().score
        coleman = r.coleman_liau().score
        flesch_ease = r.flesch().score
        gunning = r.gunning_fog().score
        smog = r.smog().score
        dale = r.dale_chall().score
        return fk_grade, coleman, flesch_ease, gunning, smog, dale
    except Exception:
        return (float('nan'),) * 6

fk, cl, fe, gf, sm, dc = zip(*df['simplified_text'].apply(readability_metrics))

df['flesch_kincaid_grade'] = fk
df['coleman_liau_index'] = cl
df['flesch_reading_ease'] = fe
df['gunning_fog_index'] = gf
df['smog_index'] = sm
df['dale_chall_score'] = dc

display(df[['filename','flesch_kincaid_grade','coleman_liau_index','flesch_reading_ease','gunning_fog_index','smog_index','dale_chall_score']].head())

Unnamed: 0,filename,flesch_kincaid_grade,coleman_liau_index,flesch_reading_ease,gunning_fog_index,smog_index,dale_chall_score
0,10.1002-14651858.CD014257.pub2-abstract.txt,7.849922,11.589904,60.933739,10.456645,10.20307,8.493285
1,10.1002-14651858.CD003459.pub4-abstract.txt,5.132122,9.628262,72.751679,5.68584,8.021204,8.589774
2,10.1002-14651858.CD013446-abstract.txt,8.404382,11.100568,59.438479,9.233668,11.072351,9.157926
3,10.1002-14651858.CD013283.pub2-abstract.txt,7.315629,9.340707,66.805106,8.302959,9.299571,6.642672
4,10.1002-14651858.CD014953.pub2-abstract.txt,6.629797,10.041534,64.574145,7.823044,9.968508,7.950451


## AlignScore (Factuality)

In [13]:
device_str = "cuda:0" if torch.cuda.is_available() else "cpu"
print("Using device:", device_str)

alignscorer = AlignScore(
    model="roberta-base",
    batch_size=8,
    device=device_str,
    ckpt_path="https://huggingface.co/yzha/AlignScore/resolve/main/AlignScore-base.ckpt",
    evaluation_mode="bin_sp",
    verbose=False)

def clean_text_for_alignscore(text):
    if not isinstance(text, str):
        return " "
    # Remove Markdown headings like '#', '##'
    text = re.sub(r"^#{1,6}\s*", "", text, flags=re.MULTILINE)
    # Remove explicit section headers (case-insensitive)
    section_patterns = [
        r"\bPlain Language Summary\b[:\-]?",
        r"\bPlain Title\b[:\-]?",
        r"\bRationale\b[:\-]?",
        r"\bTrial Design\b[:\-]?",
        r"\bResults\b[:\-]?"]
    for pat in section_patterns:
        text = re.sub(pat, "", text, flags=re.IGNORECASE)
    # Remove excessive whitespace
    text = re.sub(r"\s+", " ", text).strip()
    return text or " "

def _call_score(aligner, srcs, outs):
    sig = inspect.signature(aligner.score)
    params = set(sig.parameters.keys())

    # 1) Try kwargs signature (contexts/claims)
    try:
        if {"contexts", "claims"}.issubset(params):
            res = aligner.score(contexts=srcs, claims=outs)
        else:
            # 2) Fallback: positional signature
            res = aligner.score(srcs, outs)
    except TypeError:
        # Some versions raise TypeError for the “wrong” signature; try the other
        try:
            res = aligner.score(srcs, outs)
        except TypeError:
            res = aligner.score(contexts=srcs, claims=outs)

    # Normalize return
    if isinstance(res, dict) and "scores" in res:
        return res["scores"]
    return res  # list/np array

def compute_alignscore_robust(aligner, sources, outputs, manual_batch_size=None):
    """
    - Cleans inputs
    - Never passes batch_size into .score()
    """
    srcs = [clean_text_for_alignscore(s) for s in sources]
    outs = [clean_text_for_alignscore(o) for o in outputs]

    if manual_batch_size is None:
        scores = _call_score(aligner, srcs, outs)
        # Convert to python floats if needed
        return [float(x) if x is not None else math.nan for x in scores]

    # Manual batching path
    all_scores = []
    for i in range(0, len(srcs), manual_batch_size):
        chunk_src = srcs[i:i+manual_batch_size]
        chunk_out = outs[i:i+manual_batch_size]
        sc = _call_score(aligner, chunk_src, chunk_out)
        all_scores.extend(sc)
    return [float(x) if x is not None else math.nan for x in all_scores]

try:
    # Option A: rely on AlignScore's internal batch_size (simplest)
    df["alignscore"] = compute_alignscore_robust(alignscorer,
                                                 df["pls"].tolist(),
                                                 df["simplified_text"].tolist())
    # Option B: manual batching (e.g., if you still see memory/timeouts)
    # df["alignscore"] = compute_alignscore_robust(alignscorer,
    #                                              df["original_text"].tolist(),
    #                                              df["simplified_text"].tolist(),
    #                                              manual_batch_size=8)
except Exception as e:
    print("Error calculating AlignScore:", e)
    df["alignscore"] = [math.nan] * len(df)

Using device: cuda:0


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading: "https://huggingface.co/yzha/AlignScore/resolve/main/AlignScore-base.ckpt" to /root/.cache/torch/hub/checkpoints/AlignScore-base.ckpt


100%|██████████| 1.83G/1.83G [00:05<00:00, 390MB/s]
INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.7.7 to v1.9.5. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint --file https:/huggingface.co/yzha/AlignScore/resolve/main/AlignScore-base.ckpt`
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  rank_zero_warn(


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

## BERTScore (Relevance)

In [14]:
from bert_score import score as bertscore
from evaluate import load
bertscore_alt = load("bertscore")

results_alt = bertscore_alt.compute(
    predictions=df['simplified_text'],
    references=df['pls'],
    model_type="roberta-base",
    lang="en")

# Convertir los resultados en listas de precisión, recall y F1
P2 = results_alt["precision"]
R2 = results_alt["recall"]
F1_2 = results_alt["f1"]

# Guardar en df_out con nombres distintos
df["BERT2_Precision_Longformer"] = P2
df["BERT2_Recall_Longformer"] = R2
df["BERT2_F1_Longformer"] = F1_2

print(f"✅ BERTScore (Longformer) calculado")
print(f"   - Precision media: {sum(P2)/len(P2):.3f}")
print(f"   - Recall media: {sum(R2)/len(R2):.3f}")
print(f"   - F1 media: {sum(F1_2)/len(F1_2):.3f}")

Downloading builder script: 0.00B [00:00, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ BERTScore (Longformer) calculado
   - Precision media: 0.845
   - Recall media: 0.845
   - F1 media: 0.845


In [15]:
# New DataFrame
df_results = pd.DataFrame({'FKG': df['flesch_kincaid_grade'].mean(), 'FRE': df['flesch_reading_ease'].mean(), 'SMOG': df['smog_index'].mean(),
                           'DC': df['dale_chall_score'].mean(), 'GF': df['gunning_fog_index'].mean(),
                           'AlignScore': df['alignscore'].mean(), 'BERT F1': sum(F1_2)/len(F1_2)}, index=[0])
df_results

Unnamed: 0,FKG,FRE,SMOG,DC,GF,AlignScore,BERT F1
0,6.658892,65.753641,9.456454,8.136346,8.092884,0.478756,0.844867


In [16]:
df['alignscore'].min(), df['alignscore'].max()

(0.23386619985103607, 0.8194447755813599)

In [17]:
df['flesch_kincaid_grade'].min(), df['flesch_kincaid_grade'].max()

(4.150012081246384, 10.626183745583038)

In [18]:
df['smog_index'].min(), df['smog_index'].max()

(7.03164865440522, 13.867442237049442)

In [19]:
df['flesch_reading_ease'].min(), df['flesch_reading_ease'].max()

(44.3394569462526, 82.81005505181349)

## Save Results

In [20]:
out_csv = "simplified_texts_with_scores.csv"
df.to_csv(out_csv, index_label="filename")
print(f"Saved: {out_csv}")

Saved: simplified_texts_with_scores.csv
