In [None]:
# Install libraries

!pip install transformers sentencepiece torch numpy pandas nltk bert_score

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import T5Tokenizer, T5ForConditionalGeneration, TrainingArguments, DataCollatorForSeq2Seq, PegasusForConditionalGeneration, PegasusTokenizer
import os
from nltk.translate.bleu_score import sentence_bleu, corpus_bleu, SmoothingFunction
from bert_score import score
import nltk



In [None]:
# add paths
test_input = "/path/to/input/folder/containing/csv"
output_file = "path/for/processed/ouput/csv"

Preprocessing of the csv files to create dataset

In [None]:
def assign_counseling_component(sub_topic):
  if sub_topic == "symp/reasoning":
    return "SH"  # Symptom & History
  elif sub_topic == "routine":
    return "RT"  # Reflecting
  elif sub_topic == "inactive":
    return "DF"  # Discussion Filler
  else:
    return "PD"  # Default to Patient Discovery if unknown


def create_dataset(input_path, output_path):
  final_data = []

  for filename in os.listdir(input_path):
    if filename.endswith(".csv"):
      # print(f"Processing file: {filename}")
      file_path = os.path.join(input_path, filename)
      df = pd.read_csv(file_path)

      # extracting targeted set, primary and secondary topic
      summary_text, primary_topic, secondary_topic = None, None, None
      for i in range(1, 4): 
        if "summary" in str(df.iloc[-i, 0]).lower():
          summary_text = df.iloc[-i, 1]
        elif "primary topic" in str(df.iloc[-i, 0]).lower():
          primary_topic = df.iloc[-i, 1]
        elif "secondary topic" in str(df.iloc[-i, 0]).lower():
          secondary_topic = df.iloc[-i, 1]
      # print(summary_text)
      # print(primary_topic)
      # print(secondary_topic)

      # Remove last three rows
      df_cleaned = df.iloc[:-3]
      df_cleaned = df_cleaned.reset_index(drop=True)
      df_cleaned = df_cleaned[["Utterance", "Sub topic"]]
      # print(df_cleaned)

      df_cleaned["Counseling_Component"] = df_cleaned["Sub topic"].apply(assign_counseling_component)

      # Combine utterances into a single conversation text
      full_conversation = " ".join(df_cleaned["Utterance"].astype(str).fillna("").tolist())

      t5_input = f"Summarize: {full_conversation}"
      t5_target = summary_text 
      # print(t5_input)
      # print(t5_target)

      final_data.append({"Index": len(final_data), "Input": t5_input, "Output": t5_target})
      
  df_final = pd.DataFrame(final_data)
  df_final.to_csv(output_path, index=False)
  print(f"Processed and saved all data to: {output_path}")

In [None]:
input_folder = test_input
output_file = output_file
create_dataset(input_folder, output_file)

In [None]:
# creating test dataframe
test_df = pd.read_csv(output_file)
test_df.fillna("", inplace=True)
test_df["Input"] = test_df["Input"].astype(str)
test_df["Output"] = test_df["Output"].astype(str)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
nltk.download('punkt_tab')
nltk.download("punkt")

T5-small

In [None]:
df = test_df

In [None]:
# Loading the model
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)


# Generating summaries using t5-small model
def generate_summary(text):
    input_text = "summarize: " + text 
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
    with torch.no_grad():
        summary_ids = model.generate(**inputs, max_length=100, num_beams=5, length_penalty=2.0)
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

df["Generated_Summary"] = df["Input"].apply(generate_summary)


In [None]:
# computing the BLEU and BERT scores

def compute_sentence_bleu(reference, hypothesis):
    reference_tokens = [reference.split()]
    hypothesis_tokens = hypothesis.split()
    smoothing = SmoothingFunction().method4 
    weights = (0.25, 0.25, 0.25, 0.25) 
    return sentence_bleu(reference_tokens, hypothesis_tokens, weights=weights, smoothing_function=smoothing)

# BLEU scores
df["BLEU_Score"] = df.apply(lambda row: compute_sentence_bleu(row["Output"], row["Generated_Summary"]), axis=1)

references = [[ref.split()] for ref in df["Output"].tolist()]
hypotheses = [hyp.split() for hyp in df["Generated_Summary"].tolist()]

final_bleu = corpus_bleu(references, hypotheses, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=SmoothingFunction().method4)

# Compute BERTScore
P, R, F1 = score(df["Generated_Summary"].tolist(), df["Output"].tolist(), lang="en", rescale_with_baseline=True)

df["BERTScore_F1"] = F1.tolist()

final_bert = F1.mean().item()

print(f"Final Corpus BLEU Score: {final_bleu:.4f}")
print(f"Final BERTScore F1: {final_bert:.4f}")

T5-Large

In [None]:
df = test_df

In [None]:
# Loading the model
model_name = "t5-large" 
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Generating summaries for each data point
def generate_summary(text):
    input_text = "Summarize: " + text
    input_ids = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512).input_ids
    outputs = model.generate(input_ids, max_length=150, num_beams=5, length_penalty=1.0, early_stopping=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

df["Generated_Summary"] = df["Input"].apply(generate_summary)

In [None]:
# computing the BLEU and BERT scores
def compute_sentence_bleu(reference, hypothesis):
    reference_tokens = [reference.split()]
    hypothesis_tokens = hypothesis.split()
    smoothing = SmoothingFunction().method4 
    weights = (0.25, 0.25, 0.25, 0.25) 
    return sentence_bleu(reference_tokens, hypothesis_tokens, weights=weights, smoothing_function=smoothing)

# BLEU scores
df["BLEU_Score"] = df.apply(lambda row: compute_sentence_bleu(row["Output"], row["Generated_Summary"]), axis=1)

references = [[ref.split()] for ref in df["Output"].tolist()]
hypotheses = [hyp.split() for hyp in df["Generated_Summary"].tolist()]

final_bleu = corpus_bleu(references, hypotheses, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=SmoothingFunction().method4)
# Compute BERTScore
P, R, F1 = score(df["Generated_Summary"].tolist(), df["Output"].tolist(), lang="en", rescale_with_baseline=True)

df["BERTScore_F1"] = F1.tolist()

final_bert = F1.mean().item()

print(f"Final Corpus BLEU Score: {final_bleu:.4f}")
print(f"Final BERTScore F1: {final_bert:.4f}")

Scaling of BERT and BLEU scores : https://github.com/Tiiiger/bert_score/blob/master/journal/rescale_baseline.md

In [None]:
# computing the BLEU and BERT scores and scaling them 

def compute_sentence_bleu(reference, hypothesis):
    reference_tokens = [nltk.word_tokenize(reference)]  
    hypothesis_tokens = nltk.word_tokenize(hypothesis)  
    smoothing = SmoothingFunction().method4
    return sentence_bleu(reference_tokens, hypothesis_tokens, smoothing_function=smoothing)

# BLEU scores
df["BLEU_Score"] = df.apply(lambda row: compute_sentence_bleu(row["Output"], row["Generated_Summary"]), axis=1)

df["BLEU_Score"] = (df["BLEU_Score"] - df["BLEU_Score"].min()) / (df["BLEU_Score"].max() - df["BLEU_Score"].min())

references = [[nltk.word_tokenize(ref)] for ref in df["Output"].tolist()]
hypotheses = [nltk.word_tokenize(hyp) for hyp in df["Generated_Summary"].tolist()]
final_bleu = corpus_bleu(references, hypotheses, smoothing_function=SmoothingFunction().method4)

final_bleu = (final_bleu - 0) / (1 - 0) 

# BERT Score
P, R, F1 = score(df["Generated_Summary"].astype(str).tolist(), df["Output"].astype(str).tolist(), lang="en", rescale_with_baseline=True)

df["BERTScore_F1"] = (F1 - np.min(F1.numpy())) / (np.max(F1.numpy()) - np.min(F1.numpy()))
final_bert = df["BERTScore_F1"].mean()

print(f"Final Corpus BLEU Score (Scaled): {final_bleu:.4f}")
print(f"Final BERTScore F1 (Scaled): {final_bert:.4f}")


Pegasus

In [None]:
df = test_df

In [None]:
# Loading pegasus-large model
model_name = "google/pegasus-large" 
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name)

if device == "cuda":
  model = model.to(device)

def generate_summary(text):
    inputs = tokenizer(text, truncation=True, padding="longest", return_tensors="pt", max_length=512).to(device)
    with torch.no_grad():
        summary_ids = model.generate(**inputs, max_length=100, num_beams=5, length_penalty=2.0)
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

df["Generated_Summary"] = df["Input"].apply(generate_summary)

In [None]:
# computing BLEU and BERT scores
def compute_sentence_bleu(reference, hypothesis):
    reference_tokens = [nltk.word_tokenize(reference)] 
    hypothesis_tokens = nltk.word_tokenize(hypothesis) 
    smoothing = SmoothingFunction().method4
    return sentence_bleu(reference_tokens, hypothesis_tokens, smoothing_function=smoothing)

# Compute BLEU scores
df["BLEU_Score"] = df.apply(lambda row: compute_sentence_bleu(row["Output"], row["Generated_Summary"]), axis=1)

df["BLEU_Score"] = (df["BLEU_Score"] - df["BLEU_Score"].min()) / (df["BLEU_Score"].max() - df["BLEU_Score"].min())

# Compute Corpus BLEU
references = [[nltk.word_tokenize(ref)] for ref in df["Output"].tolist()]
hypotheses = [nltk.word_tokenize(hyp) for hyp in df["Generated_Summary"].tolist()]
final_bleu = corpus_bleu(references, hypotheses, smoothing_function=SmoothingFunction().method4)

final_bleu = (final_bleu - 0) / (1 - 0)

# Compute BERTScore
P, R, F1 = score(df["Generated_Summary"].astype(str).tolist(), df["Output"].astype(str).tolist(), lang="en", rescale_with_baseline=True)

df["BERTScore_F1"] = (F1 - np.min(F1.numpy())) / (np.max(F1.numpy()) - np.min(F1.numpy()))
final_bert = df["BERTScore_F1"].mean()

print(f"Final Corpus BLEU Score (Scaled): {final_bleu:.4f}")
print(f"Final BERTScore F1 (Scaled): {final_bert:.4f}")