In [1]:
import pandas as pd
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

# Load Pegasus model and tokenizer
model_name = "google/pegasus-xsum"
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.52M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/259 [00:00<?, ?B/s]

In [5]:
!pip install transformers rouge-score pandas


Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=5b4410370c33aef142fcadcb4fbdff2c92fbd15414ac258dec662acd99b09bd0
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [6]:
from rouge_score import rouge_scorer

In [7]:
# Function to calculate ROUGE scores between the generated and reference texts
def calculate_rouge(reference, generated):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, generated)
    return scores

In [8]:
# Function to summarize clinical text using Pegasus
def summarize_text(text):
    tokens = tokenizer(text, truncation=True, padding="longest", return_tensors="pt")
    summary_ids = model.generate(tokens.input_ids, max_length=60, num_beams=5, early_stopping=True)
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Function to get a clinical text row from the dataset
def get_clinical_text(row):
    age = row["Age"]
    gender = row["Gender"]
    condition = row["Condition"]
    treatment = row["Treatment"]
    outcome = row["Outcome"]
    hospital_stay = row["Hospital Stay (Days)"]
    comorbidities = row["Comorbidities"]

    # Combine the relevant columns into a single clinical text input
    clinical_text = (f"The patient is a {age}-year-old {gender}. Condition: {condition}. "
                     f"Treatment: {treatment}. Outcome: {outcome}. Hospital stay: {hospital_stay} days. "
                     f"Comorbidities: {comorbidities}.")
    return clinical_text

In [9]:
# Load the clinical dataset
file_path = 'updated_clinical_medical_dataset.csv'  # Replace with the actual file path
clinical_df = pd.read_csv(file_path)

In [10]:
# Allow user to input clinical text or choose from dataset rows
while True:
    print("Would you like to input clinical details manually or use data from the dataset?")
    print("1. Enter manually\n2. Use dataset row (enter row number)\nType 'exit' to stop:")
    user_input = input().lower()

    if user_input == 'exit':
        break
    elif user_input == '1':
        # Manually enter clinical details
        print("Enter clinical details (Age, Gender, Condition, Treatment, Outcome, etc.):")
        manual_text = input()
        summarized_output = summarize_text(manual_text)
        print(f"\nSummarized Text: {summarized_output}\n")

        # Calculate ROUGE score (using manual text as reference)
        scores = calculate_rouge(manual_text, summarized_output)
        print(f"\nROUGE Scores: {scores}\n")
    elif user_input == '2':
        try:
            # Use a specific row from the dataset
            print(f"Enter a row number between 0 and {len(clinical_df)-1}:")
            row_number = int(input())
            if 0 <= row_number < len(clinical_df):
                # Generate clinical text for the chosen row
                clinical_text = get_clinical_text(clinical_df.iloc[row_number])
                print(f"\nOriginal Clinical Text: {clinical_text}")

                # Summarize the clinical text
                summarized_output = summarize_text(clinical_text)
                print(f"\nSummarized Text: {summarized_output}\n")

                # Calculate ROUGE score
                scores = calculate_rouge(clinical_text, summarized_output)
                print(f"\nROUGE Scores: {scores}\n")
            else:
                print(f"Invalid row number. Please choose a number between 0 and {len(clinical_df)-1}.")
        except ValueError:
            print("Invalid input. Please enter a valid row number.")
    else:
        print("Invalid choice. Please enter '1', '2', or 'exit'.")

Would you like to input clinical details manually or use data from the dataset?
1. Enter manually
2. Use dataset row (enter row number)
Type 'exit' to stop:
1
Enter clinical details (Age, Gender, Condition, Treatment, Outcome, etc.):
Patient is a 58-year-old male with a history of hypertension and Type 2 diabetes. He was admitted for chest pain and diagnosed with coronary artery disease. The patient underwent coronary artery bypass surgery and was treated with medication for blood pressure control. His hospital stay lasted 12 days, and he was discharged in stable condition.

Summarized Text: The case of a patient who underwent a successful heart bypass operation has been reported in the journal Thorax.


ROUGE Scores: {'rouge1': Score(precision=0.42105263157894735, recall=0.14035087719298245, fmeasure=0.21052631578947367), 'rouge2': Score(precision=0.0, recall=0.0, fmeasure=0.0), 'rougeL': Score(precision=0.2631578947368421, recall=0.08771929824561403, fmeasure=0.13157894736842105)}

W