# Task
Simplify PDF documents from "https://api.github.com/repos/feliperussi/bridging-the-gap-in-health-literacy/contents/llms_testing/Cochrane/ground_truth" using the Gemini 2.5 Flash API and evaluate the results using BERTScore for relevance, AlignScore for factuality, and for readability: CLI FRE, GFI, SMOG, FKGL, DCRS.

## Setup and authentication

Install necessary libraries and set up authentication for Gemini API.


In [None]:
%pip install google-generativeai evaluate[bertscore] transformers sentence-transformers matplotlib pandas numpy textstat

Collecting textstat
  Downloading textstat-0.7.11-py3-none-any.whl.metadata (15 kB)
Collecting evaluate[bertscore]
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Collecting pyphen (from textstat)
  Downloading pyphen-0.17.2-py3-none-any.whl.metadata (3.2 kB)
Downloading textstat-0.7.11-py3-none-any.whl (176 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.4/176.4 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyphen-0.17.2-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m38.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyphen, textstat, evaluate
Successfully installed evaluate-0.4.6 pyphen-0.17.2 textstat-0.7.11


In [None]:
import os
from google.colab import userdata

os.environ['GOOGLE_API_KEY'] = userdata.get('GOOGLE_API_KEY')

In [None]:
import torch
print(torch.__version__)

2.8.0+cu126


## Data Ingestion - Cochrane
Load text files from the GitHub repository "https://api.github.com/repos/feliperussi/bridging-the-gap-in-health-literacy/".

In [None]:
%pip install requests



Load abstracts



In [None]:
import csv

file_names = []
with open('/content/abstract_names.csv', 'r') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        file_names.append(row[0]) # Assuming each row contains one filename

print(f"Loaded {len(file_names)} filenames from abstract_names.csv")

Loaded 300 filenames from abstract_names.csv


In [None]:
import requests
import os

github_base_urls = [
    "https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/llms_testing/Cochrane/ground_truth/",
    "https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/data_collection_and_processing/Data%20Sources/Cochrane/test/non_pls/",
    "https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/data_collection_and_processing/Data%20Sources/Cochrane/train/non_pls/"
]
local_download_dir = 'downloaded_texts'

# Create the local directory if it doesn't exist
os.makedirs(local_download_dir, exist_ok=True)

downloaded_count = 0

for file_name in file_names:
    local_file_path = os.path.join(local_download_dir, file_name)
    downloaded = False

    if os.path.exists(local_file_path):
        print(f"File {file_name} already exists locally. Skipping download.")
        downloaded_count += 1
        downloaded = True
        continue # Skip to the next file_name if already downloaded


    for base_url in github_base_urls:
        file_url = base_url + file_name

        try:
            print(f"Attempting to download {file_name} from {base_url}")
            file_content_response = requests.get(file_url)
            file_content_response.raise_for_status()  # Raise an HTTPError for bad responses (4xx or 5xx)

            with open(local_file_path, 'w', encoding='utf-8') as f:
                f.write(file_content_response.text)
            downloaded_count += 1
            downloaded = True
            print(f"Successfully downloaded {file_name}")
            break  # Move to the next file_name if download is successful

        except requests.exceptions.RequestException as e:
            print(f"Could not download {file_name} from {base_url}: {e}")
        except Exception as e:
            print(f"An unexpected error occurred while processing {file_name} from {base_url}: {e}")

    if not downloaded:
        print(f"Warning: Could not download {file_name} from any of the provided URLs.")


print(f"Download complete. Successfully downloaded {downloaded_count} out of {len(file_names)} files.")

File 10.1002-14651858.CD000371.pub7-abstract.txt already exists locally. Skipping download.
Attempting to download 10.1002-14651858.CD001218.pub3-abstract.txt from https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/llms_testing/Cochrane/ground_truth/
Successfully downloaded 10.1002-14651858.CD001218.pub3-abstract.txt
File 10.1002-14651858.CD001977.pub2-abstract.txt already exists locally. Skipping download.
Attempting to download 10.1002-14651858.CD002201.pub6-abstract.txt from https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/llms_testing/Cochrane/ground_truth/
Successfully downloaded 10.1002-14651858.CD002201.pub6-abstract.txt
Attempting to download 10.1002-14651858.CD002779.pub3-abstract.txt from https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/llms_testing/Cochrane/ground_truth/
Successfully downloaded 10.1002-14651858.CD002779.pub3-abstract.txt
Attempting to download 10.1

Download complete. Successfully downloaded 300 out of 300 files.


### Load abstract text

Load the content of the downloaded text files into a dictionary.

In [None]:
import os

downloaded_texts_content = {}

for filename in os.listdir(local_download_dir):
    if filename.endswith('.txt'):
        filepath = os.path.join(local_download_dir, filename)
        try:
            with open(filepath, 'r', encoding='utf-8') as f:
                downloaded_texts_content[filename] = f.read()
            print(f"Loaded text from {filename}")
        except Exception as e:
            print(f"Error loading text from {filename}: {e}")


Loaded text from 10.1002-14651858.CD014257.pub2-abstract.txt
Loaded text from 10.1002-14651858.CD003459.pub4-abstract.txt
Loaded text from 10.1002-14651858.CD013446-abstract.txt
Loaded text from 10.1002-14651858.CD013283.pub2-abstract.txt
Loaded text from 10.1002-14651858.CD014953.pub2-abstract.txt
Loaded text from 10.1002-14651858.CD015397-abstract.txt
Loaded text from 10.1002-14651858.CD013173.pub2-abstract.txt
Loaded text from 10.1002-14651858.CD013194-abstract.txt
Loaded text from 10.1002-14651858.CD013276.pub2-abstract.txt
Loaded text from 10.1002-14651858.CD013705.pub3-abstract.txt
Loaded text from 10.1002-14651858.CD008838.pub2-abstract.txt
Loaded text from 10.1002-14651858.CD010613.pub2-abstract.txt
Loaded text from 10.1002-14651858.CD013333.pub2-abstract.txt
Loaded text from 10.1002-14651858.CD009961.pub2-abstract.txt
Loaded text from 10.1002-14651858.CD013444.pub2-abstract.txt
Loaded text from 10.1002-14651858.CD013259.pub2-abstract.txt
Loaded text from 10.1002-14651858.CD004

In [None]:
print("Downloaded filenames:")
for filename in downloaded_texts_content.keys():
    print(filename)

Downloaded filenames:
10.1002-14651858.CD014257.pub2-abstract.txt
10.1002-14651858.CD003459.pub4-abstract.txt
10.1002-14651858.CD013446-abstract.txt
10.1002-14651858.CD013283.pub2-abstract.txt
10.1002-14651858.CD014953.pub2-abstract.txt
10.1002-14651858.CD015397-abstract.txt
10.1002-14651858.CD013173.pub2-abstract.txt
10.1002-14651858.CD013194-abstract.txt
10.1002-14651858.CD013276.pub2-abstract.txt
10.1002-14651858.CD013705.pub3-abstract.txt
10.1002-14651858.CD008838.pub2-abstract.txt
10.1002-14651858.CD010613.pub2-abstract.txt
10.1002-14651858.CD013333.pub2-abstract.txt
10.1002-14651858.CD009961.pub2-abstract.txt
10.1002-14651858.CD013444.pub2-abstract.txt
10.1002-14651858.CD013259.pub2-abstract.txt
10.1002-14651858.CD004474.pub2-abstract.txt
10.1002-14651858.CD013057.pub2-abstract.txt
10.1002-14651858.CD013348.pub2-abstract.txt
10.1002-14651858.CD008011.pub3-abstract.txt
10.1002-14651858.CD002948.pub2-abstract.txt
10.1002-14651858.CD005231.pub3-abstract.txt
10.1002-14651858.CD013316

In [None]:
print(len(downloaded_texts_content))

300


In [None]:
import pandas as pd

# Create a DataFrame from the dictionary
# Assuming the dictionary keys are filenames and values are text content
df_downloaded = pd.DataFrame.from_dict(downloaded_texts_content, orient='index', columns=['text_content'])

# Save the DataFrame to a CSV file
csv_filepath = 'downloaded_texts_content.csv'
df_downloaded.to_csv(csv_filepath, index_label='filename')

print(f"Downloaded texts content saved to {csv_filepath}")

Downloaded texts content saved to downloaded_texts_content.csv


## Text Simplification

### Subtask:
Use the Gemini API with the provided prompt to simplify the extracted text.

In [None]:
import time
from google import genai
from google.genai import types
from google.api_core import exceptions as core_exceptions

simplified_texts = {}
prompt = """Using the following abstract of a biomedical study as input, generate a Plain Language Summary
(PLS) understandable by any patient, regardless of their health literacy. Ensure that the generated text
adheres to the following instructions which should be followed step-by-step:
a. Specific Structure: The generated PLS should be presented in a logical order, using the following
order:
1. Plain Title
2. Rationale
3. Trial Design
4. Results
b. Sections should be authored following these parameters:
1. Plain Title: Simplified title understandable to a layperson that summarizes the research that was
done.
2. Rationale: Include: background or study rationale providing a general description of the
condition, what it may cause or why it is a burden for the patients; the reason and main hypothesis
for the study; and why the study is needed, and why the study medication has the potential to
treat the condition.
3. Trial Design: Answer ‘How is this study designed?’ Include the description of the design,
description of study and patient population (age, health condition, gender), and the expected
amount of time a person will be in the study.
4. Results: Answer ‘What were the main results of the study’, include the benefits for the patients,
how the study was relevant for the area of study, and the conclusions from the investigator.
c. Consistency and Replicability: The generated PLS should be consistent regardless of the order of
sentences or the specific phrasing used in the input protocol text.
d. Compliance with Plain Language Guidelines: The generated PLS must follow all these plain
language guidelines:
• Have readability grade level of 6 or below.
• Do not have jargon. All technical or medical words or terms should be defined or broken down
into simple and logical explanations.
• Active voice, not passive.
• Mostly one or two syllable words.
• Sentences of 15 words or less.
• Short paragraphs of 3-5 sentences.
• Simple numbers (e.g., ratios, no percentages).
e. Do not invent Content: The AI model should not invent information. If the AI model includes data
other than the one given in the input abstract, the AI model should guarantee such data is verified and
real.
f. Aim for an approximate PLS length of 500-900 words.


Abstract of a biomedical study text: {text}
"""

client = genai.Client(api_key=os.environ['GOOGLE_API_KEY'])
counter = 0
sum_times = 0

for filename, text in downloaded_texts_content.items():
    counter += 1
    retries = 3  # Set a number of retries


    while retries > 0:
        init_time = time.time()

        try:
            response = client.models.generate_content(
                model="gemini-2.5-flash",
                contents=prompt.format(text=text),
                config=types.GenerateContentConfig(thinking_config=types.ThinkingConfig(thinking_budget=0))
            )
            simplified_texts[filename] = response.text
            response_time = time.time() - init_time
            sum_times += response_time
            print(f"Simplified text # {counter} for {filename}. Duration: {response_time}")
            break  # Exit the retry loop on success
        except core_exceptions.ServiceUnavailable as e:
            print(f"ServiecUnavailable error for {filename}: {e}. Retrying in 60 seconds...")
            retries -= 1
            time.sleep(60)
            if retries == 0:
                print(f"Failed to simplify {filename} after multiple retries.")
        except Exception as e:
            print(f"An unexpected error occurred while simplifying {filename}: {e}")
            break # Exit the retry loop for other errors

print(f"Average time: {sum_times/counter}")


Simplified text # 1 for 10.1002-14651858.CD014257.pub2-abstract.txt. Duration: 3.803506374359131
Simplified text # 2 for 10.1002-14651858.CD003459.pub4-abstract.txt. Duration: 3.625622034072876
Simplified text # 3 for 10.1002-14651858.CD013446-abstract.txt. Duration: 3.4325881004333496
Simplified text # 4 for 10.1002-14651858.CD013283.pub2-abstract.txt. Duration: 3.7483065128326416
Simplified text # 5 for 10.1002-14651858.CD014953.pub2-abstract.txt. Duration: 3.952915906906128
Simplified text # 6 for 10.1002-14651858.CD015397-abstract.txt. Duration: 3.2626614570617676
Simplified text # 7 for 10.1002-14651858.CD013173.pub2-abstract.txt. Duration: 3.0602941513061523
Simplified text # 8 for 10.1002-14651858.CD013194-abstract.txt. Duration: 3.826127052307129
Simplified text # 9 for 10.1002-14651858.CD013276.pub2-abstract.txt. Duration: 3.7685022354125977
Simplified text # 10 for 10.1002-14651858.CD013705.pub3-abstract.txt. Duration: 4.89568567276001
Simplified text # 11 for 10.1002-1465185

In [None]:
import pandas as pd

# Create a DataFrame from the extracted and simplified texts
data = {'original_text': downloaded_texts_content, 'simplified_text': simplified_texts}
df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
csv_filepath = 'simplified_texts.csv'
df.to_csv(csv_filepath, index_label='filename')

print(f"Results saved to {csv_filepath}")

Results saved to simplified_texts.csv


# Task
Load original pls and compare to simplified texts from "/content/simplified_texts.csv" and provide a Relevance score using BERTScore, Factual consistence score using AlignScore and Readability score using Flesch–Kincaid Grade Level for each row.

## Load pls text

Load pls

In [None]:
import csv

pls_file_names = []
with open('/content/pls_names.csv', 'r') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        pls_file_names.append(row[0]) # Assuming each row contains one filename

print(f"Loaded {len(pls_file_names)} filenames from pls_names.csv")

Loaded 300 filenames from pls_names.csv


In [None]:
import requests
import os

github_base_urls = [
    "https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/llms_testing/Cochrane/ground_truth/",
    "https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/data_collection_and_processing/Data%20Sources/Cochrane/test/pls/",
    "https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/data_collection_and_processing/Data%20Sources/Cochrane/train/pls/"
]
local_download_dir = 'pls_downloaded_texts'

# Create the local directory if it doesn't exist
os.makedirs(local_download_dir, exist_ok=True)

downloaded_count = 0

for file_name in pls_file_names:
    local_file_path = os.path.join(local_download_dir, file_name)
    downloaded = False

    if os.path.exists(local_file_path):
        print(f"File {file_name} already exists locally. Skipping download.")
        downloaded_count += 1
        downloaded = True
        continue # Skip to the next file_name if already downloaded


    for base_url in github_base_urls:
        file_url = base_url + file_name

        try:
            print(f"Attempting to download {file_name} from {base_url}")
            file_content_response = requests.get(file_url)
            file_content_response.raise_for_status()  # Raise an HTTPError for bad responses (4xx or 5xx)

            with open(local_file_path, 'w', encoding='utf-8') as f:
                f.write(file_content_response.text)
            downloaded_count += 1
            downloaded = True
            print(f"Successfully downloaded {file_name}")
            break  # Move to the next file_name if download is successful

        except requests.exceptions.RequestException as e:
            print(f"Could not download {file_name} from {base_url}: {e}")
        except Exception as e:
            print(f"An unexpected error occurred while processing {file_name} from {base_url}: {e}")

    if not downloaded:
        print(f"Warning: Could not download {file_name} from any of the provided URLs.")


print(f"Download complete. Successfully downloaded {downloaded_count} out of {len(pls_file_names)} files.")

Attempting to download 10.1002-14651858.CD000371.pub7-pls.txt from https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/llms_testing/Cochrane/ground_truth/
Could not download 10.1002-14651858.CD000371.pub7-pls.txt from https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/llms_testing/Cochrane/ground_truth/: 404 Client Error: Not Found for url: https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/llms_testing/Cochrane/ground_truth/10.1002-14651858.CD000371.pub7-pls.txt
Attempting to download 10.1002-14651858.CD000371.pub7-pls.txt from https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/data_collection_and_processing/Data%20Sources/Cochrane/test/pls/
Could not download 10.1002-14651858.CD000371.pub7-pls.txt from https://raw.githubusercontent.com/feliperussi/bridging-the-gap-in-health-literacy/main/data_collection_and_processing/Data%20Sources/Cochrane/t

KeyboardInterrupt: 

In [None]:
import os

pls_downloaded_texts_content = {}

for filename in os.listdir(local_download_dir):
    if filename.endswith('.txt'):
        filepath = os.path.join(local_download_dir, filename)
        try:
            with open(filepath, 'r', encoding='utf-8') as f:
                clean_name = filename.replace("-pls.txt", "")
                pls_downloaded_texts_content[clean_name] = f.read()
            print(f"Loaded text from {filename}")
        except Exception as e:
            print(f"Error loading text from {filename}: {e}")


Loaded text from 10.1002-14651858.CD003315.pub3-pls.txt
Loaded text from 10.1002-14651858.CD010497.pub2-pls.txt
Loaded text from 10.1002-14651858.CD002948.pub2-pls.txt
Loaded text from 10.1002-14651858.CD000371.pub7-pls.txt
Loaded text from 10.1002-14651858.CD005974.pub5-pls.txt
Loaded text from 10.1002-14651858.CD009487.pub3-pls.txt
Loaded text from 10.1002-14651858.CD004957.pub3-pls.txt
Loaded text from 10.1002-14651858.CD009134.pub2-pls.txt
Loaded text from 10.1002-14651858.CD008844.pub2-pls.txt
Loaded text from 10.1002-14651858.CD009058.pub3-pls.txt
Loaded text from 10.1002-14651858.CD006237.pub4-pls.txt
Loaded text from 10.1002-14651858.CD003737.pub4-pls.txt
Loaded text from 10.1002-14651858.CD007263.pub3-pls.txt
Loaded text from 10.1002-14651858.CD006171.pub4-pls.txt
Loaded text from 10.1002-14651858.CD005231.pub3-pls.txt
Loaded text from 10.1002-14651858.CD009868.pub3-pls.txt
Loaded text from 10.1002-14651858.CD009949.pub2-pls.txt
Loaded text from 10.1002-14651858.CD010299.pub2-

In [None]:
print(pls_downloaded_texts_content)

{'10.1002-14651858.CD003315.pub3': 'Feeding interventions for growth and development in infants with cleft lip, cleft palate or cleft lip and palate \nCleft lip and cleft palate (the roof of the mouth) are common defects. The severity of the cleft (opening) varies and it can occur on one (unilateral) or both sides (bilateral). It can be difficult to feed babies enough nutritious food when they have this condition, and there is evidence of delayed development in children born with cleft. \nThis review aimed to compare the effects of different feeding interventions such as maternal advice and support, modified bottles and/or teats, obturating or maxillary plates (plates placed in the roof of the mouth to artificially close the cleft palate) and supplemental breastfeeding in babies with cleft lip and/or palate prior to, or following, closure. Interventions in the first 6 months from term and used with breast milk or formula feeding only were considered, but outcomes (measures of growth, d

In [None]:
import pandas as pd

# Load the simplified_texts.csv file
df_simplified = pd.read_csv('simplified_texts.csv')

# Remove "-abstract.txt" from the 'filename' column
df_simplified['filename'] = df_simplified['filename'].str.replace('-abstract.txt', '', regex=False)

# Set 'filename' as the index for both dataframes for merging
df_simplified = df_simplified.set_index('filename')
df_pls = pd.DataFrame.from_dict(pls_downloaded_texts_content, orient='index', columns=['pls_text_content'])


# Merge the two dataframes
df_merged = df_simplified.join(df_pls)

# Display the head of the merged DataFrame
display(df_merged.head())

Unnamed: 0_level_0,original_text,simplified_text,pls_text_content
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10.1002-14651858.CD014257.pub2,Background\nFunctional constipation is defined...,## Plain Language Summary: Do Probiotics Help ...,
10.1002-14651858.CD003459.pub4,Methotrexate for treatment of active treatment...,## Plain Title\nStudy on a Drug for Crohn's Di...,Methotrexate for treatment of active treatment...
10.1002-14651858.CD013446,Background\nVitamin D deficiency during pregna...,## Plain Language Summary\n\n### Plain Title\n...,
10.1002-14651858.CD013283.pub2,Background\nHypoglycaemia is a common occurren...,Here's a Plain Language Summary of the study a...,
10.1002-14651858.CD014953.pub2,Background\nCataract surgery is the most commo...,Here is a Plain Language Summary of the provid...,


In [None]:
display(df_merged.head())
display(df_merged.info())

Unnamed: 0_level_0,original_text,simplified_text,pls_text_content
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10.1002-14651858.CD014257.pub2,Background\nFunctional constipation is defined...,## Plain Language Summary: Do Probiotics Help ...,
10.1002-14651858.CD003459.pub4,Methotrexate for treatment of active treatment...,## Plain Title\nStudy on a Drug for Crohn's Di...,Methotrexate for treatment of active treatment...
10.1002-14651858.CD013446,Background\nVitamin D deficiency during pregna...,## Plain Language Summary\n\n### Plain Title\n...,
10.1002-14651858.CD013283.pub2,Background\nHypoglycaemia is a common occurren...,Here's a Plain Language Summary of the study a...,
10.1002-14651858.CD014953.pub2,Background\nCataract surgery is the most commo...,Here is a Plain Language Summary of the provid...,


<class 'pandas.core.frame.DataFrame'>
Index: 300 entries, 10.1002-14651858.CD014257.pub2 to 10.1002-14651858.CD013487
Data columns (total 3 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   original_text     300 non-null    object
 1   simplified_text   300 non-null    object
 2   pls_text_content  62 non-null     object
dtypes: object(3)
memory usage: 17.5+ KB


None

In [None]:
# Save the DataFrame to a CSV file
csv_filepath = 'abstract_generated_pls_gemini25.csv'
df_merged.to_csv(csv_filepath, index_label='filename')

print(f"DataFrame saved to {csv_filepath}")

## Load data

Load the data from "/content/abstract_generated_pls_gemini25.csv" into a pandas DataFrame.


In [None]:
import pandas as pd

df = pd.read_csv("abstract_generated_pls_gemini25.csv", index_col='filename')
display(df.head())
display(df.info())

Unnamed: 0_level_0,original_text,simplified_text,pls_text_content
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10.1002-14651858.CD014257.pub2,Background\nFunctional constipation is defined...,## Plain Language Summary: Do Probiotics Help ...,Probiotics for treatment of chronic constipati...
10.1002-14651858.CD003459.pub4,Methotrexate for treatment of active treatment...,## Plain Title\nStudy on a Drug for Crohn's Di...,Methotrexate for treatment of active treatment...
10.1002-14651858.CD013446,Background\nVitamin D deficiency during pregna...,## Plain Language Summary\n\n### Plain Title\n...,Regimens of vitamin D supplementation for wome...
10.1002-14651858.CD013283.pub2,Background\nHypoglycaemia is a common occurren...,Here's a Plain Language Summary of the study a...,First aid glucose administration routes for sy...
10.1002-14651858.CD014953.pub2,Background\nCataract surgery is the most commo...,Here is a Plain Language Summary of the provid...,Can virtual reality training for cataract surg...


<class 'pandas.core.frame.DataFrame'>
Index: 300 entries, 10.1002-14651858.CD014257.pub2 to 10.1002-14651858.CD013487
Data columns (total 3 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   original_text     300 non-null    object
 1   simplified_text   300 non-null    object
 2   pls_text_content  300 non-null    object
dtypes: object(3)
memory usage: 9.4+ KB


None

## Calculate bertscore

Compute BERTScore for each pair of original and simplified texts.


In [None]:
%pip install google-generativeai evaluate[bertscore] transformers sentence-transformers matplotlib pandas numpy textstat

Collecting textstat
  Downloading textstat-0.7.11-py3-none-any.whl.metadata (15 kB)
Collecting evaluate[bertscore]
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Collecting pyphen (from textstat)
  Downloading pyphen-0.17.2-py3-none-any.whl.metadata (3.2 kB)
Downloading textstat-0.7.11-py3-none-any.whl (176 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.4/176.4 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyphen-0.17.2-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m70.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyphen, textstat, evaluate
Successfully installed evaluate-0.4.6 pyphen-0.17.2 textstat-0.7.11


In [None]:
%pip install bert_score

Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert_score
Successfully installed bert_score-0.3.13


In [None]:
from evaluate import load

bertscore = load("bertscore")

precision_scores = []
recall_scores = []
f1_scores = []

for index, row in df.iterrows():
    original_text = row['pls_text_content']
    simplified_text = row['simplified_text']

    # BERTScore expects lists of strings
    results = bertscore.compute(predictions=[simplified_text], references=[original_text], model_type="allenai/longformer-large-4096-finetuned-triviaqa")

    f1_scores.append(results['f1'][0])

df['bertscore'] = f1_scores

display(df.head())

Downloading builder script: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/866 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.74G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.74G [00:00<?, ?B/s]

Input ids are automatically padded to be a multiple of `config.attention_window`: 512


Unnamed: 0_level_0,original_text,simplified_text,pls_text_content,bertscore
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10.1002-14651858.CD014257.pub2,Background\nFunctional constipation is defined...,## Plain Language Summary: Do Probiotics Help ...,Probiotics for treatment of chronic constipati...,0.830032
10.1002-14651858.CD003459.pub4,Methotrexate for treatment of active treatment...,## Plain Title\nStudy on a Drug for Crohn's Di...,Methotrexate for treatment of active treatment...,0.838723
10.1002-14651858.CD013446,Background\nVitamin D deficiency during pregna...,## Plain Language Summary\n\n### Plain Title\n...,Regimens of vitamin D supplementation for wome...,0.834336
10.1002-14651858.CD013283.pub2,Background\nHypoglycaemia is a common occurren...,Here's a Plain Language Summary of the study a...,First aid glucose administration routes for sy...,0.816371
10.1002-14651858.CD014953.pub2,Background\nCataract surgery is the most commo...,Here is a Plain Language Summary of the provid...,Can virtual reality training for cataract surg...,0.816787


In [None]:
print(sum(df['bertscore']) / len(df['bertscore']))

0.8330190253257751


## Calculate AlignScore and Flesch–Kincaid Grade Level

Compute AlignScore and Flesch–Kincaid Grade Level for each pair of original and simplified texts.

In [None]:
# Clone the alignscore repository
!pip install git+https://github.com/yuh-zha/AlignScore.git --no-deps
!pip install pytorch-lightning==1.9.5

import torch
import transformers
if not hasattr(transformers, "AdamW"):
    from torch.optim import AdamW
    transformers.AdamW = AdamW


Collecting git+https://github.com/yuh-zha/AlignScore.git
  Cloning https://github.com/yuh-zha/AlignScore.git to /tmp/pip-req-build-dgmyccsz
  Running command git clone --filter=blob:none --quiet https://github.com/yuh-zha/AlignScore.git /tmp/pip-req-build-dgmyccsz
  Resolved https://github.com/yuh-zha/AlignScore.git to commit a0936d5afee642a46b22f6c02a163478447aa493
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [None]:
from alignscore import AlignScore

In [None]:
# Download the spaCy language model
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m127.3 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
# Install textstat if not already installed
%pip install textstat

import nltk

nltk.download('punkt_tab')



[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
import torch

torch.cuda.empty_cache()
print("CUDA cache cleared.")

CUDA cache cleared.


In [None]:
import textstat

device_id = 'cuda' if torch.cuda.is_available() else 'cpu'

alignscorer = AlignScore(model='roberta-large',
    batch_size=32,
    device=device_id,
    ckpt_path='https://huggingface.co/yzha/AlignScore/resolve/main/AlignScore-large.ckpt',
    evaluation_mode='nli_sp',
    verbose=False
)

align_scores = []

for index, row in df.iterrows():
    original_text = row['pls_text_content']
    simplified_text = row['simplified_text']
    # Pass texts as lists to the score method
    align_scores.append(alignscorer.score([original_text], [simplified_text])[0])
    print(align_scores[-1])

df['alignscore'] = align_scores

display(df.head())

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading: "https://huggingface.co/yzha/AlignScore/resolve/main/AlignScore-large.ckpt" to /root/.cache/torch/hub/checkpoints/AlignScore-large.ckpt


100%|██████████| 4.56G/4.56G [00:21<00:00, 227MB/s]
INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.8.0.post1 to v1.9.5. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint --file https:/huggingface.co/yzha/AlignScore/resolve/main/AlignScore-large.ckpt`
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  rank_zero_warn(


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

0.5657334327697754
0.6205008625984192
0.5978964567184448
0.48998817801475525
0.3701719641685486
0.49663761258125305
0.45774245262145996
0.5551057457923889
0.7895410060882568
0.6319034099578857
0.46761295199394226
0.6618960499763489
0.5279214382171631
0.7893241047859192
0.5824472904205322
0.5631817579269409
0.5723324418067932
0.65570467710495
0.504098117351532
0.7268499135971069
0.7389707565307617
0.8630031943321228
0.3738457262516022
0.6106303930282593
0.5189270377159119
0.6876274347305298
0.49910086393356323
0.5532565116882324
0.5523911118507385
0.5586130023002625
0.5334881544113159
0.548869788646698
0.6966871023178101
0.5516550540924072
0.507824182510376
0.6322880387306213
0.40115731954574585
0.4326176643371582
0.5618187189102173
0.7275824546813965
0.6140598654747009
0.541111409664154
0.8143125772476196
0.6012130379676819
0.49674561619758606
0.5396820306777954
0.49480268359184265
0.7492445111274719
0.5452721118927002
0.5494757890701294
0.7460689544677734
0.43362754583358765
0.4194680

Unnamed: 0_level_0,original_text,simplified_text,pls_text_content,bertscore,alignscore
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10.1002-14651858.CD014257.pub2,Background\nFunctional constipation is defined...,## Plain Language Summary: Do Probiotics Help ...,Probiotics for treatment of chronic constipati...,0.830032,0.565733
10.1002-14651858.CD003459.pub4,Methotrexate for treatment of active treatment...,## Plain Title\nStudy on a Drug for Crohn's Di...,Methotrexate for treatment of active treatment...,0.838723,0.620501
10.1002-14651858.CD013446,Background\nVitamin D deficiency during pregna...,## Plain Language Summary\n\n### Plain Title\n...,Regimens of vitamin D supplementation for wome...,0.834336,0.597896
10.1002-14651858.CD013283.pub2,Background\nHypoglycaemia is a common occurren...,Here's a Plain Language Summary of the study a...,First aid glucose administration routes for sy...,0.816371,0.489988
10.1002-14651858.CD014953.pub2,Background\nCataract surgery is the most commo...,Here is a Plain Language Summary of the provid...,Can virtual reality training for cataract surg...,0.816787,0.370172


In [None]:
print(sum(df['alignscore']) / len(df['alignscore']))

0.5887109000484149


In [None]:

flesch_kincaid_scores = []
coleman_liau_scores = []
flesch_reading_ease_scores = []
gunning_fog_scores = []
smog_index_scores = []
dale_chall_scores = []


for index, row in df.iterrows():
    original_text = row['original_text']
    simplified_text = row['simplified_text']

    # Calculate Flesch-Kincaid Grade Level
    flesch_kincaid_scores.append(textstat.flesch_kincaid_grade(simplified_text))

    # Calculate Coleman-Liau Index
    coleman_liau_scores.append(textstat.coleman_liau_index(simplified_text))

    # Calculate Flesch Reading Ease
    flesch_reading_ease_scores.append(textstat.flesch_reading_ease(simplified_text))

    # Calculate Gunning Fog Index
    gunning_fog_scores.append(textstat.gunning_fog(simplified_text))

    # Calculate SMOG Index
    smog_index_scores.append(textstat.smog_index(simplified_text))

    # Calculate Dale-Chall Readability Score
    dale_chall_scores.append(textstat.dale_chall_readability_score(simplified_text))

df['flesch_kincaid_grade'] = flesch_kincaid_scores
df['coleman_liau_index'] = coleman_liau_scores
df['flesch_reading_ease'] = flesch_reading_ease_scores
df['gunning_fog_index'] = gunning_fog_scores
df['smog_index'] = smog_index_scores
df['dale_chall_score'] = dale_chall_scores


display(df.head())

Unnamed: 0_level_0,original_text,simplified_text,pls_text_content,bertscore,alignscore,flesch_kincaid_grade,coleman_liau_index,flesch_reading_ease,gunning_fog_index,smog_index,dale_chall_score
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10.1002-14651858.CD014257.pub2,Background\nFunctional constipation is defined...,## Plain Language Summary: Do Probiotics Help ...,Probiotics for treatment of chronic constipati...,0.830032,0.565733,5.638949,8.520734,72.283328,7.78015,8.893546,8.995367
10.1002-14651858.CD003459.pub4,Methotrexate for treatment of active treatment...,## Plain Title\nStudy on a Drug for Crohn's Di...,Methotrexate for treatment of active treatment...,0.838723,0.620501,3.398437,6.846729,82.247739,4.36964,6.351523,9.602705
10.1002-14651858.CD013446,Background\nVitamin D deficiency during pregna...,## Plain Language Summary\n\n### Plain Title\n...,Regimens of vitamin D supplementation for wome...,0.834336,0.597896,5.721094,7.26,73.976667,8.220871,9.236283,9.096526
10.1002-14651858.CD013283.pub2,Background\nHypoglycaemia is a common occurren...,Here's a Plain Language Summary of the study a...,First aid glucose administration routes for sy...,0.816371,0.489988,3.59849,5.501235,85.042734,4.390311,5.894771,7.222786
10.1002-14651858.CD014953.pub2,Background\nCataract surgery is the most commo...,Here is a Plain Language Summary of the provid...,Can virtual reality training for cataract surg...,0.816787,0.370172,6.376858,8.975465,69.731731,7.627051,8.780746,10.220897


In [None]:
# Save the DataFrame to a CSV file
csv_filepath = 'simplified_texts_with_scores.csv'
df.to_csv(csv_filepath, index_label='filename')

print(f"DataFrame saved to {csv_filepath}")

DataFrame saved to simplified_texts_with_scores.csv


In [None]:
print(sum(df['bertscore']) / len(df['bertscore']))
print(sum(df['alignscore']) / len(df['alignscore']))
print(sum(df['flesch_reading_ease']) / len(df['flesch_reading_ease']))
print(sum(df['flesch_kincaid_grade']) / len(df['flesch_kincaid_grade']))
print(sum(df['coleman_liau_index']) / len(df['coleman_liau_index']))
print(sum(df['gunning_fog_index']) / len(df['gunning_fog_index']))
print(sum(df['smog_index']) / len(df['smog_index']))
print(sum(df['dale_chall_score']) / len(df['dale_chall_score']))


0.8330190253257751
0.5887109000484149
78.7326589603404
4.6687603115898355
7.271027679487249
6.01728990972142
7.634476782059343
8.69822300650926
