In [None]:
# Install necessary packages
!pip install transformers datasets nltk rouge_score bert_score moverscore pyemd pytorch_pretrained_bert evaluate

In [None]:
!pip install --upgrade numpy==1.26.0

In [None]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from transformers import BartTokenizer, BartForConditionalGeneration, Trainer, TrainingArguments
from datasets import Dataset
from rouge_score import rouge_scorer
import evaluate

In [None]:
# Load the original train and test dataset
train_data = pd.read_csv('/content/drive/MyDrive/mimic_train.csv')
test_data = pd.read_csv('/content/drive/MyDrive/mimic_test.csv')

In [None]:
# Select first 3000 rows from the train dataset and 500 rows from the test dataset
train_data = train_data.iloc[:3000].copy()
test_data = test_data.iloc[:500].copy()

In [None]:
# Preprocessing function
def preprocess(df):
    # Drop unnecessary columns
    df = df.drop(['subject_id', 'study_id'], axis=1)
    return df

In [None]:
# Apply preprocessing to train and test datasets
train_data = preprocess(train_data)
test_data = preprocess(test_data)

In [None]:
# Split train dataset into train and validation
train_df, val_df = train_test_split(train_data, test_size=0.2, random_state=42)

In [None]:
# Convert to Hugging Face Dataset format
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)
test_dataset = Dataset.from_pandas(test_data)

In [None]:
# Tokenizer and model initialization
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-base')

In [None]:
# Tokenization function
def tokenize(batch):
    inputs = tokenizer(batch['finding'], max_length=512, truncation=True, padding='max_length', return_tensors="pt")
    targets = tokenizer(batch['impression'], max_length=150, truncation=True, padding='max_length', return_tensors="pt")
    inputs['labels'] = targets['input_ids']
    return inputs

In [None]:
# Tokenize datasets
train_dataset = train_dataset.map(tokenize, batched=True, remove_columns=train_dataset.column_names)
val_dataset = val_dataset.map(tokenize, batched=True, remove_columns=val_dataset.column_names)
test_dataset = test_dataset.map(tokenize, batched=True, remove_columns=test_dataset.column_names)

Map:   0%|          | 0/2400 [00:00<?, ? examples/s]

Map:   0%|          | 0/600 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [None]:
# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=3e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    save_total_limit=1,
    logging_dir='./logs',
    logging_steps=10,
    save_steps=500
)



In [None]:
# Trainer initialization
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer
)

  trainer = Trainer(


In [None]:
# Train the model
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33maninbhtry-01[0m ([33maninbhtry-01-jadavpur-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss
1,0.3492,0.267159
2,0.2397,0.244002
3,0.2068,0.243622
4,0.1719,0.24119
5,0.1899,0.239028




TrainOutput(global_step=1500, training_loss=0.49634736180305483, metrics={'train_runtime': 1137.7647, 'train_samples_per_second': 10.547, 'train_steps_per_second': 1.318, 'total_flos': 3658418749440000.0, 'train_loss': 0.49634736180305483, 'epoch': 5.0})

In [None]:
# Summarization of the findings from the test dataset
test_data_with_summary = test_data.copy()
test_data_with_summary['summary'] = test_data_with_summary['finding'].apply(
    lambda x: tokenizer.decode(
        model.generate(
            tokenizer(x, return_tensors="pt", max_length=512, truncation=True)['input_ids'].to(model.device),
            max_length=150, num_beams=4, length_penalty=1.5
        )[0], skip_special_tokens=True
    )
)

In [None]:
# Convert to CSV
file_path = "/content/drive/MyDrive/ME THESIS/Predicted Summaries/Bart.csv"
test_data_with_summary.to_csv(file_path, index=False)

In [None]:
# Load the CSV file containing the model generated summaries
test_data_with_summary = pd.read_csv("/content/drive/MyDrive/ME THESIS/Predicted Summaries/Bart.csv")

In [None]:
# Extract ground truth (impression) and generated summaries
references = test_data_with_summary["impression"].astype(str).tolist()
predictions = test_data_with_summary["summary"].astype(str).tolist()

In [None]:
# Initialize ROUGE scorer
rouge = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

In [None]:
# Compute ROUGE, METEOR, and store them
rouge1_scores, rouge2_scores, rougeL_scores = [], [], []

In [None]:
from nltk.translate.meteor_score import meteor_score

In [None]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [None]:
for ref, pred in zip(references, predictions):
    # Compute ROUGE scores
    scores = rouge.score(ref, pred)
    rouge1_scores.append(scores['rouge1'].fmeasure)
    rouge2_scores.append(scores['rouge2'].fmeasure)
    rougeL_scores.append(scores['rougeL'].fmeasure)

In [None]:
from bert_score import score

In [None]:
# Compute BERTScore
P, R, F1 = score(predictions, references, lang="en", verbose=False)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Calculate averages
average_scores = {
    "ROUGE-1": sum(rouge1_scores) / len(rouge1_scores),
    "ROUGE-2": sum(rouge2_scores) / len(rouge2_scores),
    "ROUGE-L": sum(rougeL_scores) / len(rougeL_scores),
    "BERTScore-F1": F1.mean().item()
}

In [None]:
# Print the results
print("Average Scores:")
for metric, score in average_scores.items():
    print(f"{metric}: {score:.4f}")

Average Scores:
ROUGE-1: 0.3733
ROUGE-2: 0.2215
ROUGE-L: 0.3352
BERTScore-F1: 0.8888


In [None]:
# Load metrics
meteor = evaluate.load("meteor")

Downloading builder script:   0%|          | 0.00/7.02k [00:00<?, ?B/s]

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


In [None]:
# Calculate METEOR
meteor_score = meteor.compute(predictions=predictions, references=references)

In [None]:
# Print METEOR score
print("METEOR:", meteor_score)

METEOR: {'meteor': 0.3445187207010541}
