## <center>Deep Learning and Neural Networks: Final Project</center>
### This notebook contains the testing and inferencing of the test set on the mentioned models:
- The finetuned models include <b>Pegasus-Large, BART-Base, T5-Base</b> [Fine Tuning shown in other notebook].
- Zero-shot predictions using <b>GPT-3 (text-davinci-003)</b> API (provided by OpenAI)
- Zero-shot predictions using <b>GPT-4 (ChatGPT)</b>

## Installing necessary modules

In [None]:
!pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 torchtext==0.14.1 torchdata==0.5.1 --extra-index-url https://download.pytorch.org/whl/cu116 -U
# Installing the BLURR library and Bert-Score package

!pip install ohmeow-blurr -q
!pip install bert-score -q
!pip install wandb
!pip install sacrebleu

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/, https://download.pytorch.org/whl/cu116
Collecting torch==1.13.1+cu116
  Downloading https://download.pytorch.org/whl/cu116/torch-1.13.1%2Bcu116-cp39-cp39-linux_x86_64.whl (1977.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 GB[0m [31m887.7 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchvision==0.14.1+cu116
  Downloading https://download.pytorch.org/whl/cu116/torchvision-0.14.1%2Bcu116-cp39-cp39-linux_x86_64.whl (24.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.2/24.2 MB[0m [31m38.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchaudio==0.13.1
  Downloading https://download.pytorch.org/whl/cu116/torchaudio-0.13.1%2Bcu116-cp39-cp39-linux_x86_64.whl (4.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.2/4.2 MB[0m [31m100.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchtext==0.14.1
  Download

## Importing the required modules

In [None]:
import pandas as pd
from fastai.text.all import *
from transformers import *
from blurr.text.data.all import *
from blurr.text.modeling.all import *
from fastai.callback.wandb import *

  squad_metric = load_metric("squad")


Downloading builder script:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

## Importing the Test Set from PubMed-Summarization Dataset

In [None]:
# Load the dataset from Hugging Face

from datasets import load_dataset
dataset = load_dataset('ccdv/pubmed-summarization', split="test[:100]")
test_data = pd.DataFrame(dataset)
# Preprocess and taking first 1000 samples.

test_data['article'] = test_data['article'].apply(lambda x: x.replace('\n',''))
test_data = test_data.head(20)
test_data.shape

Downloading builder script:   0%|          | 0.00/5.13k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/2.66k [00:00<?, ?B/s]



Downloading and preparing dataset pubmed-summarization/section to /root/.cache/huggingface/datasets/ccdv___pubmed-summarization/section/1.0.0/f765ec606c790e8c5694b226814a13f1974ba4ea98280989edaffb152ded5e2b...


Downloading data:   0%|          | 0.00/779M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/43.7M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/43.8M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Dataset pubmed-summarization downloaded and prepared to /root/.cache/huggingface/datasets/ccdv___pubmed-summarization/section/1.0.0/f765ec606c790e8c5694b226814a13f1974ba4ea98280989edaffb152ded5e2b. Subsequent calls will reuse this data.


(20, 2)

## OpenAI API calls for GPT-3

In [3]:
# installing openai package

!pip install openai

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting openai
  Downloading openai-0.27.4-py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.3/70.3 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiohttp
  Downloading aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
Collecting aiosignal>=1.1.2
  Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)
Collecting yarl<2.0,>=1.0
  Downloading yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (264 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m264.6/264.6 kB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting frozenlist>=1.1.1
  Downloading frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014

In [1]:
# importing warnings package to ignore the depreciation warnings

import warnings
warnings.filterwarnings("ignore")

In [3]:
import openai
import time


openai.api_key = "PUT YOUR API KEY HERE"


# Functiont to fetch summary using OpenAI's API
def get_GPT_summary(text, summary_wc, model, toks):

    # Prompt prefix, this is how the prompt start
    prompt_prefix = "Professionally summarize this medical article like a doctor with about"
    summary_wc = min(summary_wc - (summary_wc % 10), 250)

    try: 
      start_time = time.time()
      response = openai.Completion.create(
      model=model,
      prompt=f"{prompt_prefix} {summary_wc} to {summary_wc+50} words :\n{text}",
      temperature=0.4,
      max_tokens=toks
      )
      end_time = time.time()
      return response['choices'][0]['text'], round(end_time - start_time, 6), response['usage']['total_tokens']
    except: 
      pass
   


# Initializing the parameters for making API call
model = 'text-davinci-003'
summary_word_count = 1000
tokens = 256
results_df = pd.DataFrame(columns=['index', 'article', 'GPT_summary', 'reference_summary'])

for index, row in test_data.iterrows():
   
   output = get_GPT_summary(row.article, len(row.abstract.split()), model, tokens)
   
   if output: 
     outcome, inf_time, tokens_used = output
     # write all data for this record to a dataframe
     res = {'index': index, 
          'article': row.article,
          'GPT_summary': outcome, 
          'reference_summary': row.abstract
          }
     results_df = results_df.append(res, ignore_index=True)


In [None]:
# Results obtained form GPT-3 API Call
results_df

Unnamed: 0,index,article,GPT_summary,reference_summary
0,0,"anxiety affects quality of life in those living with parkinson 's disease ( pd ) more so than overall cognitive status , motor deficits , apathy , and depression [ 13 ] . although anxiety and depression are often related and coexist in pd patients , recent research suggests that anxiety rather than depression is the most prominent and prevalent mood disorder in pd [ 5 , 6 ] . yet , our current understanding of anxiety and its impact on cognition in pd , as well as its neural basis and best treatment practices , remains meager and lags far behind that of depression . overall , neuropsych...","\n\nAnxiety affects the quality of life of those living with Parkinson's Disease (PD) more than overall cognitive status, motor deficits, apathy, and depression. Although anxiety and depression are often related and coexist in PD patients, research suggests that anxiety is the most prominent and prevalent mood disorder. Current understanding of anxiety and its impact on cognition in PD, as well as its neural basis and best treatment practices, is limited. Neuropsychiatric symptoms in PD have been associated with poorer cognitive performance. A study of 27 PD patients showed a negative rel...","research on the implications of anxiety in parkinson 's disease ( pd ) has been neglected despite its prevalence in nearly 50% of patients and its negative impact on quality of life . \n previous reports have noted that neuropsychiatric symptoms impair cognitive performance in pd patients ; however , to date , no study has directly compared pd patients with and without anxiety to examine the impact of anxiety on cognitive impairments in pd . \n this study compared cognitive performance across 50 pd participants with and without anxiety ( 17 pda+ ; 33 pda ) , who underwent neurological and ..."
1,1,"small non - coding rnas are transcribed into mrna but remain untranslated in eukaryotic cells . they include sirna ( small interfering rna ) , mirna ( microrna ) , pirna ( piwi - interacting rna ) and snorna ( small nucleolar rna ) . mirnas are a class of multifunctional singled - stranded small rna which are ~20 nt in length and regulate the stability or translational efficiency of targeted messenger rna depending on the base - pairing complementarity between the mirna and its target mrna [ 1 , 2 ] . although over 1,000 mirna sequences have been identified from the tissues or cells of ...","\n\nSmall non-coding RNAs, including siRNA, miRNA, piRNA, and snoRNA, are transcribed but remain untranslated in eukaryotic cells. miRNAs are a class of multifunctional single-stranded small RNAs that regulate the stability and translational efficiency of targeted mRNAs. Aberrant expression of miRNAs has been observed in breast cancer and is associated with tumorigenesis, metastasis, and apoptosis. miRNAs regulate the cell cycle, cell growth, epithelial-to-mesenchymal transition, and cancer stem cell characteristics. miRNAs may also serve as novel diagnostic and prognostic biomarkers for b...","small non - coding rnas include sirna , mirna , pirna and snorna . \n the involvement of mirnas in the regulation of mammary gland tumorigenesis has been widely studied while the role for other small non - coding rnas remains unclear . here \n we summarize the involvement of mirna in breast cancer onset and progression through regulating the cell cycle and cellular proliferation . \n the regulation of breast cancer stem cells and tumor regeneration by mirna is reviewed . \n in addition , the emerging evidence demonstrating the involvement of pirna and snorna in breast cancer is briefly des..."
2,3,"congenital adrenal hyperplasia ( cah ) refers to a group of autosomal recessive disorders caused by an enzyme deficiency which leads to defects in biosynthesis of steroid precursors . depending on the severity and degree of 21 hydroxylase deficiency , the clinical spectrum may vary from mild form of non classical cah to classic cah . however , the non classical cah variant is more common with a prevalence rate of 1 in 1000 . it also helps in maintaining normal levels of precursors by suppressing adreno cortico trophic hormone ( acth ) . during childhood , the management is largely focu...","\n\nCongenital Adrenal Hyperplasia (CAH) is a group of autosomal recessive disorders caused by an enzyme deficiency, resulting in defects in steroid precursor biosynthesis. Non-classical CAH is the most common variant, with a prevalence rate of 1 in 1000. Treatment focuses on achieving normal growth and attaining appropriate final adult height. This study examined 29 patients diagnosed with CAH at a tertiary center in Southern India. Most patients were female (76%), with 11 adults and 18 children. Among the males, one child was identified at birth and five infants presented with symptoms o...","congenital adrenal hyperplasia is a group of autosomal recessive disorders caused by enzyme deficiency which leads to defects in biosynthesis of steroid precursors . \n most common is 21 hydroxylase deficiency . \n clinical spectrum varies from non - classical cah to classic cah , and it may be simple virilising form or salt - wastinfg type . \n 29 patients were included in our study from january 2012 to october 2012 . \n 76% were females . \n male babies typically presented with adrenal crisis between 3rd to 6th week of life . around 20% of females \n were identified and appropriately tre..."
3,6,"the family is the cornerstone of human social support network and its presence is essential in everyone s life . changes inevitably occur in families with illness and hospitalization of a family member . in other words , among the sources of stress for families are accidents leading to hospitalization particularly intensive care unit ( icu ) . statistics show that 8% of hospital beds in the united states are occupied by the intensive care units . stress in the family while the patient is in the icu can disrupt the harmony power of the family members and finally , it may causes disturbanc...","\n\nFamilies are an essential part of social support networks, and changes occur when a family member is hospitalized, particularly in an intensive care unit (ICU). The sources of stress for families in the ICU include the patient’s fear of death, financial problems, lack of awareness about the environment, and their satisfaction level. To address these needs, five sections have been identified: assurance, staying close to the patient, receiving information, feeling comfortable, and receiving support. Nurses are in a good position to assess family needs and plan for meeting them with appro...","background : since the family is a social system , the impairment in each of its component members may disrupt the entire family system . \n one of the stress sources for families is accidents leading to hospitalization particularly in the intensive care unit ( icu ) . in many cases , \n the families needs in patient care are not met that cause dissatisfaction . since the nurses spend a lot of time with patients and their families , they are in a good position to assess their needs and perform appropriate interventions . \n therefore , this study was conducted to determine the effectivenes..."
4,7,"development of human societies and industrialization as well as changes in stress sources has changed disease pattern in civilized societies . as a result , the disease pattern has changed from traditional diseases such as infectious diseases and malnutrition to diseases such as heart disease , diabetes , accidents and so forth . among them coronary artery disease is the most cardiovascular disorder as a health problem in developing and developed countries . in fact , this is not only a chronic disease associated with high mortality , but it causes limitations in life and disability in a...","\n\nIndustrialization and changes in stress sources have led to a shift in disease patterns in modern societies, from traditional diseases such as infectious diseases and malnutrition to diseases such as heart disease, diabetes, and accidents. Coronary artery disease is the most common cardiovascular disorder, causing high mortality and disability in a large part of the productive forces of the country. It is associated with reduced production and increased medical costs. Anxiety and depression are two of the most important psychological complications experienced by patients with cardiovas...","background and objective : anxiety and depression are among the psychological disorders in heart \n surgeries . establishing a simple communication is essential to reduce anxiety and depression . \n hence , the objective of the present studywas to examine the impact of peplau therapeutic communication model on anxiety and depression in patients , who were candidate for coronary artery bypass in al - zahra heart hospital , shiraz during 2012 - 2013 . \n methods : this is a clinical trial in which 74 patients were randomly divided into intervention and control groups , each consisted of ..."
5,10,"male macroprolactinomas ( mprl ) are usually revealed by headaches , visual troubles and gonadal insufficiency . suppurative meningitis ( sm ) , a life - threatening condition , is scarcely observed in subjects with macro tumors secreting prolactin ( prl ) and in other pituitary tumors ( pt ) . however , in some very rare cases it can be a primary presentation or appear after radiotherapy or medical treatment used for tumors destroying the sellar floor and/or the skull base . this destruction leads to cerebral spinal fluid ( csf ) leak , which can act as an entry portal for organisms pr...","\n\nMale macroprolactinomas (mprl) are usually revealed by headaches, visual troubles, and gonadal insufficiency. Suppurative meningitis (SM), a life-threatening condition, is rarely observed in subjects with macro tumors secreting prolactin (PRL) and other pituitary tumors (PT). However, in very rare cases, it can be a primary presentation or appear after radiotherapy or medical treatment used for tumors destroying the sellar floor and/or the skull base. This destruction leads to cerebral spinal fluid (CSF) leak, which can act as an entry portal for organisms predisposing to meningitis. I...","background : suppurative meningitis ( sm ) or bacterial meningitis is a life - threatening condition , which is exceptionally due to pituitary tumors ( pt ) . our aim was to analyze its frequency among male macroprolactinomas ( mprl ) deemed to be aggressive , to report the cases we observed in our practice and describe the circumstances under which sm appeared.materials and methods : we retrospectively analyzed 82 male mprl in order to look for a history of well proved sm and the circumstances under which sm appeared . \n we also took into account the possibility of sm relapsing.results:f..."
6,11,many studies have not been reported in literature for lumbar discectomy by destandau endospine system . we report a series of 300 patients operated for lumbar dissectomy by destandau endospine system . a total of 300 patients suffering from lumbar disc herniations were operated between january 2002 and december 2008 . technique comprised localization of symptomatic level followed by insertion of an endospine system devise through a 15 mm skin and fascial incision . endoscopic discectomy is then carried out by conventional micro disc surgery instruments by minimal invasive route . the ...,"\n\nThis study reports the results of a series of 300 patients who underwent lumbar discectomy by the Destandau Endospine System between January 2002 and December 2008. The results were evaluated using modified MacNab's criteria, with 90% of patients having excellent to good, 8% having fair, and 2% having poor results. The complications observed were discitis and dural tear in five patients each, and nerve root injury in two patients. 90% of patients were able to return to light and sedentary work with an average delay of 3 weeks and normal physical activities after 2 months.\n\nThe advan...",background : posterior endoscopic discectomy is an established method for treatment of lumbar disc herniation . \n many studies have not been reported in literature for lumbar discectomy by destandau endospine system . \n we report a series of 300 patients operated for lumbar dissectomy by destandau endospine system.materials and methods : a total of 300 patients suffering from lumbar disc herniations were operated between january 2002 and december 2008 . \n all patients were operated as day care procedure . \n technique comprised localization of symptomatic level followed by insertion of ...
7,15,"the femoral head often leads to healing complications , while the more predictable prosthetic replacements are associated with poorer function and significant complications . the treatment of these fractures depends on the age of the patient , fracture displacement , bone quality , timing of surgery and activity level of the patient . displaced fractures in healthy , active patients are best treated by reduction and internal fixation . there are however , complications unique to femoral neck fractures which are almost impossible to predict . non - union and avascular necrosis non - uni...","\n\nA 38-year old male presented with a displaced fracture of the femoral neck. Treatment of these fractures depends on patient age, fracture displacement, bone quality, timing of surgery and activity level. Reduction and internal fixation is the preferred treatment for healthy, active patients. However, non-union and avascular necrosis can occur and may require revision internal fixation, osteotomy, or prosthetic replacement. In this case, the patient was treated with 3 cannulated screws and followed up for 32 months, at which point full union was achieved. This case demonstrates that goo...",fracture of the femoral neck continues to be a vexing clinical and therapeutic challenge for the orthopedic surgeon . \n the fracture has a propensity for non - union and avascular necrosis . \n it is a challenge for the orthopedic surgeon to decide when to intervene in a case with non - union where the implant continues to be in place . \n we present a case with persistent clinical and radiological non - union signs where the fracture eventually united after 32 months . \n the case bolsters the view that a continued conservative regime might entail good results in such situations .
8,16,"granuloma faciale ( gf ) is an uncommon , benign , inflammatory skin disorder of unknown etiology . it is characterized by single or multiple , grey - brown or violaceous nodules or plaques primarily occurring on the face and occasionally at extra - facial sites . the disease is notoriously resistant to therapies and often tends to relapse when treatment is discontinued . we present a patient with multiple lesions of gf and its response to topical tacrolimus . a 35-year - old female presented to our department with a 5 year history of single , asymptomatic , grey - brown pigmented , no...","\n\nGranuloma faciale (GF) is an uncommon, benign, inflammatory skin disorder characterized by single or multiple, grey-brown or violaceous nodules or plaques primarily occurring on the face. Treatment with topical tacrolimus has been reported to be successful in several cases. A 35-year-old female with multiple lesions of GF showed 40-50% improvement after 3 months of tacrolimus ointment application. It is suggested that tacrolimus may be a well-tolerated and efficacious therapy for GF.",granuloma faciale ( gf ) is a chronic condition characterized by red - brown plaques with follicular accentuation present usually on the face . \n we present a case of 35-year - old female with 5 year history of plaques over cheek and extra facial sites consistent with gf and its response to topical tacrolimus . \n this case supports previous reports of successful treatment of gf with topical tacrolimus .
9,17,"chronic myelomonocytic leukaemia ( cmmol ) is a rare disease with an incidence of 0.370.72 per 100 000 population . the median age at diagnosis varies between 65 years and 75 years [ 14 ] . the radical treatment is allogeneic stem cell transplantation , although this treatment is unsuitable for most patients because of their advanced age [ 6 , 7 ] . the median survival of patients with cmmol is 1520 months [ 5 , 6 , 8 ] . several case reports have indicated that excess surgical stress causes an acute blastic crisis , which contributes to a poor outcome [ 9 , 10 ] . a 68-year - old man ...","\n\nChronic myelomonocytic leukaemia (CMMoL) is a rare disease with an incidence of 0.37-0.72 per 100,000 population. The median age at diagnosis is between 65 and 75 years. Allogeneic stem cell transplantation is the radical treatment, but is not suitable for most patients due to advanced age. Median survival of CMMoL patients is 15-20 months. A 68-year-old man with CMMoL was suffering from reduced appetite, weight loss and abdominal pain. He was prescribed oral hydroxyurea and later admitted to hospital for anaemia and decreased platelet numbers. He was then diagnosed with right mandibul...","we report a case of chronic myelomonocytic leukaemia ( cmmol ) in a 68-year - old man who developed osteomyelitis of the mandible . at the initial visit \n , he reported uncontrolled gingival bleeding , despite self - administered haemostasis . \n he complained of severe pain in the socket , despite potent opioid analgesia . \n after consultation with the internal medicine specialists , we undertook a surgical anti - inflammatory approach that included sequestrectomy with massive blood transfusion . \n his physical condition was ameliorated after the surgical procedure , and he was dischar..."


**Generated Summary**

In [None]:
results_df["GPT_summary"][0]

" \n\nAnxiety affects the quality of life of those living with Parkinson's Disease (PD) more than overall cognitive status, motor deficits, apathy, and depression. Although anxiety and depression are often related and coexist in PD patients, research suggests that anxiety is the most prominent and prevalent mood disorder. Current understanding of anxiety and its impact on cognition in PD, as well as its neural basis and best treatment practices, is limited. Neuropsychiatric symptoms in PD have been associated with poorer cognitive performance. A study of 27 PD patients showed a negative relationship between anxiety and cognitive performance. Two further studies examined the influence of symptom laterality on anxiety and cognition, but their findings were inconsistent. This study compared the cognition of PD patients with and without anxiety, excluding depression. Results showed that PD patients with anxiety had worse performance on attentional set-shifting, working memory, and verbal m

**Ground Truth Summary**

In [None]:
results_df["reference_summary"][0]

"research on the implications of anxiety in parkinson 's disease ( pd ) has been neglected despite its prevalence in nearly 50% of patients and its negative impact on quality of life . \n previous reports have noted that neuropsychiatric symptoms impair cognitive performance in pd patients ; however , to date , no study has directly compared pd patients with and without anxiety to examine the impact of anxiety on cognitive impairments in pd . \n this study compared cognitive performance across 50 pd participants with and without anxiety ( 17 pda+ ; 33 pda ) , who underwent neurological and neuropsychological assessment . \n group performance was compared across the following cognitive domains : simple attention / visuomotor processing speed , executive function ( e.g. , set - shifting ) , working memory , language , and memory / new verbal learning . \n results showed that pda+ performed significantly worse on the digit span forward and backward test and part b of the trail making task

## Testing the Fine-Tuned Models

In [None]:
# Mounting Google Drive, so that .pkl file can be imported to colab
from google.colab import drive
drive.mount('/content/drive')

### Evaluation

<b>Loading BART-BASE Model</b>

In [None]:
bart_base_model = load_learner(fname="/content/drive/MyDrive/Summarization_Models/bart_base_summary_export.pkl")

<b>Loading T5-BASE Model</b>

In [None]:
t5_base_model = load_learner(fname="/content/drive/MyDrive/Summarization_Models/t5_base_summary_export.pkl")

<b>Loading Pegasus-Large Model</b>

In [None]:
pegasus_large_model = load_learner(fname="/content/drive/MyDrive/Summarization_Models/pegasus_summary_export.pkl")

<b>Generating Summaries</b>

In [None]:
# Initializing this dataframe, which stores the summaries for all the models
results_BartT5_Pegasus_df = pd.DataFrame(columns=["article", "Bart_Gen_Summary", "T5_Gen_Summary", "Pegasus_Gen_Summary", "reference_summary"])

# Iterating over the articles to be summarized
for _, row in results_df.iterrows():

  # Inferencing using BART
  bart_res = bart_base_model.blurr_summarize(row.article)

  # Inferencing using T5
  t5_res = t5_base_model.blurr_summarize(row.article)

  # Inferencing using Pegasus
  pegasus_res = pegasus_large_model.blurr_summarize(row.article)

  # Creating a dictionary of results obtained
  gen_res = {
      "article": row.article, 
      "Bart_Gen_Summary": bart_res[0]['summary_texts'], 
      "T5_Gen_Summary": t5_res[0]['summary_texts'], 
      "Pegasus_Gen_Summary": pegasus_res[0]['summary_texts'],
      "reference_summary": row.reference_summary
  }

  # Appending results to the DataFrame
  results_BartT5_Pegasus_df = results_BartT5_Pegasus_df.append(gen_res, ignore_index = True)

In [None]:
# Checking out results form the Models
results_BartT5_Pegasus_df

Unnamed: 0,article,Bart_Gen_Summary,T5_Gen_Summary,Pegasus_Gen_Summary,reference_summary
0,"anxiety affects quality of life in those living with parkinson 's disease ( pd ) more so than overall cognitive status , motor deficits , apathy , and depression [ 13 ] . although anxiety and depression are often related and coexist in pd patients , recent research suggests that anxiety rather than depression is the most prominent and prevalent mood disorder in pd [ 5 , 6 ] . yet , our current understanding of anxiety and its impact on cognition in pd , as well as its neural basis and best treatment practices , remains meager and lags far behind that of depression . overall , neuropsych...","background : anxiety affects quality of life in those living with parkinson 's disease ( pd ) more so than overall cognitive status , motor deficits , apathy , and depression . \n however , few studies have specifically investigated the relationship between anxiety and cognition in pd and more specifically how anxiety might influence particular domains of cognition such as attention and memory and executive functioning.materials and methods : seventeen patients with anxiety and thirty - three pd patients without anxiety were included in this study.results : anxiety was significantly assoc...","background : recent studies have demonstrated that anxiety affects cognition in patients with parkinson 's disease ( pd ) more than overall cognitive status , motor deficits , and depression . however , the correlation between anxiety and cognition has not been clearly defined.objectives:the aim of this study was to investigate the relationship between anxiety - induced cognitive impairment ( cvs. ) and cognitive function.patients and methods : a total of the repeatable battery for the assessment of neuropsychological status index ( mmse ) versus rpd patients ( n = 27 ) were analyzed . the...","anxiety affects quality of life in those living with parkinson 's disease ( pd ) more so than overall cognitive status , motor deficits , apathy , and depression . although anxiety and depression are often related and coexist in pd patients , recent research suggests that anxiety rather than depression is the most prominent and prevalent mood disorder in pd . anxiety has been shown to negatively affect cognition in pd . however , our understanding of anxiety and its impact on cognition in pd , as well as its neural basis and best treatment practices , remains meager and lags far behind tha...","research on the implications of anxiety in parkinson 's disease ( pd ) has been neglected despite its prevalence in nearly 50% of patients and its negative impact on quality of life . \n previous reports have noted that neuropsychiatric symptoms impair cognitive performance in pd patients ; however , to date , no study has directly compared pd patients with and without anxiety to examine the impact of anxiety on cognitive impairments in pd . \n this study compared cognitive performance across 50 pd participants with and without anxiety ( 17 pda+ ; 33 pda ) , who underwent neurological and ..."
1,"small non - coding rnas are transcribed into mrna but remain untranslated in eukaryotic cells . they include sirna ( small interfering rna ) , mirna ( microrna ) , pirna ( piwi - interacting rna ) and snorna ( small nucleolar rna ) . mirnas are a class of multifunctional singled - stranded small rna which are ~20 nt in length and regulate the stability or translational efficiency of targeted messenger rna depending on the base - pairing complementarity between the mirna and its target mrna [ 1 , 2 ] . although over 1,000 mirna sequences have been identified from the tissues or cells of ...","backgroundsmall non - coding rnas are transcribed into mrna but remain untranslated in eukaryotic cells . \n they include sirna ( small interfering rna ) , mirna ( microrna ) , pirna ( piwi - interacting rnas ) and snorna ( small nucleolar rna ). \n mirnas are a class of multifunctional singled - stranded small rna which are ~20 nt in length and regulate the stability or translational efficiency of targeted messenger rna depending on the base - pairing complementarity between the mirna and its target m","[ purpose ] the purpose of this study was to investigate the functionalities of mirnas in mammary gland tumorigenesis and to determine the optimal therapeutic strategy for patients with metastatic breast cancer . [ subjects and methods ] over 1,000 small non - coding rna sequences were identified from the tissues or cells of human origin and other species . the mirnal family encodes a subset of mrnal genomes that are transcribed and remain untranslated in eukaryotic cells . a total of 106 micrornals were identified in this study . we aimed to characterize a novel therapeutic approach using...","small non - coding rnas ( mirnas ) regulate gene expression and tumorigenesis in a broad range of biological processes including timing of development , cell cycle progression , stem cell self - renewal , differentiation , cancer cell proliferation , metastasis and apoptosis . mirnas regulate a broad range of biological processes including timing of development , cell cycle progression , stem cell self - renewal , differentiation , cancer cell proliferation , metastasis and apoptosis . recent evidence demonstrates the involvement of mirna in mammary gland tumorigenesis , functioning either...","small non - coding rnas include sirna , mirna , pirna and snorna . \n the involvement of mirnas in the regulation of mammary gland tumorigenesis has been widely studied while the role for other small non - coding rnas remains unclear . here \n we summarize the involvement of mirna in breast cancer onset and progression through regulating the cell cycle and cellular proliferation . \n the regulation of breast cancer stem cells and tumor regeneration by mirna is reviewed . \n in addition , the emerging evidence demonstrating the involvement of pirna and snorna in breast cancer is briefly des..."
2,"congenital adrenal hyperplasia ( cah ) refers to a group of autosomal recessive disorders caused by an enzyme deficiency which leads to defects in biosynthesis of steroid precursors . depending on the severity and degree of 21 hydroxylase deficiency , the clinical spectrum may vary from mild form of non classical cah to classic cah . however , the non classical cah variant is more common with a prevalence rate of 1 in 1000 . it also helps in maintaining normal levels of precursors by suppressing adreno cortico trophic hormone ( acth ) . during childhood , the management is largely focu...","background : congenital adrenal hyperplasia ( cah ) refers to a group of autosomal recessive disorders caused by an enzyme deficiency which leads to defects in biosynthesis of steroid precursors . \n depending on the severity and degree of 21 hydroxylase deficiency , the clinical spectrum may vary from mild form of non classical cah to classic cah.methods:a total of 29 patients were included in the study of which 22 were females ( 76% ) , 7 were males ( 24% ) and 7 were children ( 22% ) . in the pre pubertal period , 5 patients presented","background : congenital adrenal hyperplasia ( cah ) refers to a group of autosomal recessive disorders characterized by an enzyme deficiency leading to defects in biosynthesis of steroid precursors . in this study , 29 patients were diagnosed with cah and seen in the outpatient clinic between january 2012 and october 2012 . all patients underwent biochemical testing for 17 hydroxy progesterone ( 17 ohp ) levels for assessment of adequacy of therapy and bmi was calculated for all patients.results:the mean age of the patients was 58.6 years and the mean sds of 11 patients were > 18 years . a...","background : congenital adrenal hyperplasia ( cah ) refers to a group of autosomal recessive disorders caused by an enzyme deficiency which leads to defects in biosynthesis of steroid precursors . the clinical spectrum may vary from mild form of non classical cah to classic cah . however , the non classical cah variant is more common with a prevalence rate of 1 in 1000 . therefore , the management is largely focused on achieving normal growth and attaining appropriate final adult height . during childhood , the management is largely focused on achieving normal growth and attaining appropri...","congenital adrenal hyperplasia is a group of autosomal recessive disorders caused by enzyme deficiency which leads to defects in biosynthesis of steroid precursors . \n most common is 21 hydroxylase deficiency . \n clinical spectrum varies from non - classical cah to classic cah , and it may be simple virilising form or salt - wastinfg type . \n 29 patients were included in our study from january 2012 to october 2012 . \n 76% were females . \n male babies typically presented with adrenal crisis between 3rd to 6th week of life . around 20% of females \n were identified and appropriately tre..."
3,"the family is the cornerstone of human social support network and its presence is essential in everyone s life . changes inevitably occur in families with illness and hospitalization of a family member . in other words , among the sources of stress for families are accidents leading to hospitalization particularly intensive care unit ( icu ) . statistics show that 8% of hospital beds in the united states are occupied by the intensive care units . stress in the family while the patient is in the icu can disrupt the harmony power of the family members and finally , it may causes disturbanc...","background : the family is the cornerstone of human social support network and its presence is essential in everyone s life . changes inevitably occur in families with illness and hospitalization of a family member . in other words , among the sources of stress for families are accidents leading to hospitalization particularly intensive care unit ( icu ) . \n the purpose of the study was to determine the effectiveness of nursing interventions based on family needs on family satisfaction level of hospitalized patients in the neurosurgery intensive care units of al - zahra hospital in 2010....","background : the purpose of this study was to evaluate the satisfaction of the family members of hospitalized patients in intensive care unit ( icu ) . the families were divided into two groups : receiving assurance , staying close to the patient , receiving information , feeling comfortable and receiving support . in addition , the families need to be met and their expectations should be met.materials and methods : in this study , a sample of the families of the patients was collected and the families s satisfaction level was measured.results:the mean age of the patient was 58.6 years and...","background : family is the cornerstone of human social support network and its presence in everyone s life . changes inevitably occur in families with illness and hospitalization of a family member . in intensive care unit ( icu ) , the family needs of hospitalized patients are summarized in five sections . however , in many cases , the patient s family needs and their expectations in the icu will not be fulfilled which will cause dissatisfaction . since , how families deal with mental stress is the most important components of comprehensive care , it should be known that attention to the ...","background : since the family is a social system , the impairment in each of its component members may disrupt the entire family system . \n one of the stress sources for families is accidents leading to hospitalization particularly in the intensive care unit ( icu ) . in many cases , \n the families needs in patient care are not met that cause dissatisfaction . since the nurses spend a lot of time with patients and their families , they are in a good position to assess their needs and perform appropriate interventions . \n therefore , this study was conducted to determine the effectivenes..."
4,"development of human societies and industrialization as well as changes in stress sources has changed disease pattern in civilized societies . as a result , the disease pattern has changed from traditional diseases such as infectious diseases and malnutrition to diseases such as heart disease , diabetes , accidents and so forth . among them coronary artery disease is the most cardiovascular disorder as a health problem in developing and developed countries . in fact , this is not only a chronic disease associated with high mortality , but it causes limitations in life and disability in a...","background : the development of human societies and industrialization as well as changes in stress sources has changed disease pattern in civilized societies . as a result , \n cardiovascular diseases are among the most widespread chronic diseases in most countries . according to forecasts , the mortality rate in eastern mediterranean countries including iran will be 30 - 35% [ 6 , 7 ] . despite the emphasis on prevention and development of newtreatments , surgery is the only choice for many patients with cardiovascular disease . \n therefore , one of the surgical procedures for the treat...","background : coronary artery disease is among the most prevalent chronic diseases in developed and developed countries . on the other hand , anxiety is integral part of all psychological diseases which are associated with decreased quality of life , decreased medical care and functional impairment in daily activities . therefore , surgery is an important event in the life of patients with cardiovascular diseases.objectives:the purpose of this study is to evaluate the clinical and economic outcome of patients after coronary bypass surgery.patients and methods : a cross - sectional study was...","background : coronary artery disease is among the most common cardiovascular diseases in the world . according to nemati and colleagues , in fact , anxiety is a common psychological response of cardiovascular patients which is associated with reduced quality of life and psychological morbidity . on the other hand , depression is associated with increased risk of mortality , disability , increased medical care and functional impairment in daily activities . it is associated with increased production and medical costs . therefore , effective communication between medical team and patient is ...","background and objective : anxiety and depression are among the psychological disorders in heart \n surgeries . establishing a simple communication is essential to reduce anxiety and depression . \n hence , the objective of the present studywas to examine the impact of peplau therapeutic communication model on anxiety and depression in patients , who were candidate for coronary artery bypass in al - zahra heart hospital , shiraz during 2012 - 2013 . \n methods : this is a clinical trial in which 74 patients were randomly divided into intervention and control groups , each consisted of ..."
5,"male macroprolactinomas ( mprl ) are usually revealed by headaches , visual troubles and gonadal insufficiency . suppurative meningitis ( sm ) , a life - threatening condition , is scarcely observed in subjects with macro tumors secreting prolactin ( prl ) and in other pituitary tumors ( pt ) . however , in some very rare cases it can be a primary presentation or appear after radiotherapy or medical treatment used for tumors destroying the sellar floor and/or the skull base . this destruction leads to cerebral spinal fluid ( csf ) leak , which can act as an entry portal for organisms pr...","background : suppurative meningitis ( sm ) is a life - threatening condition . \n the aim of this study was to analyze sm frequency among male mprl deemed to be very invasive tumors secreting prolactin ( prl ) and in other pituitary tumors ( pt ) . methods : in some very rare cases it can be a primary presentation or appear after radiotherapy or medical treatment used for tumors destroying the sellar floor and/or the skull base.results:this destruction leads to cerebral spinal fluid ( csf ) leak , which can act as an entry portal for organisms predisposing to","suppurative meningitis ( sm ) , a life - threatening condition , is rare in male macroprolactinomas ( mprl ) and pituitary tumors ( pts ). in this retrospective study , we analyzed 82 subjects with mpl deemed to be very invasive tumors . radiological assessment was based on cerebral computed tomography scan and/or magnetic resonance imaging ( mri).resultswe found a significant increase in serum levels of testosterone , growth hormone , insulin steroid and csf1 . in addition , there was an increase in plasma levels of glucose , which could be a risky condition . we also found that a high fr...","male macroprolactinomas ( mprl ) are usually revealed by headaches , visual troubles and gonadal deficit . suppurative meningitis ( sm ) , a life - threatening condition , is scarcely observed in subjects with macro tumors secreting prolactin ( prl ) and in other pituitary tumors ( pt ) . however , in some very rare cases it can be a primary presentation or appear after radiotherapy or medical treatment used for tumors destroying the sellar floor and/or the skull base . this destruction leads to cerebral spinal fluid ( csf ) leak , which can act as an entry portal for organisms predisposin...","background : suppurative meningitis ( sm ) or bacterial meningitis is a life - threatening condition , which is exceptionally due to pituitary tumors ( pt ) . our aim was to analyze its frequency among male macroprolactinomas ( mprl ) deemed to be aggressive , to report the cases we observed in our practice and describe the circumstances under which sm appeared.materials and methods : we retrospectively analyzed 82 male mprl in order to look for a history of well proved sm and the circumstances under which sm appeared . \n we also took into account the possibility of sm relapsing.results:f..."
6,many studies have not been reported in literature for lumbar discectomy by destandau endospine system . we report a series of 300 patients operated for lumbar dissectomy by destandau endospine system . a total of 300 patients suffering from lumbar disc herniations were operated between january 2002 and december 2008 . technique comprised localization of symptomatic level followed by insertion of an endospine system devise through a 15 mm skin and fascial incision . endoscopic discectomy is then carried out by conventional micro disc surgery instruments by minimal invasive route . the ...,objective : the aim of this study was to present results in 300 patients operated for lumbar disc herniation by destandau endospine system.materials and methods : a total of 300 patients suffering from different type and level of lumbarr disc herniations were operated between january 2002 and december 2008 . \n technique comprised localization of symptomatic level followed by insertion of an endospines system devise through a 15 mm skin and fascial incision.results:endoscopic discectomy is then carried out by conventional micro disc surgery instruments by minimal invasive route.conclusion...,"objectivewe report a series of 300 patients operated for lumbar dissectomy by destandau endospine system . a total of 475 patients suffering from different type and level of disc herniation were operated between january 2002 and december 2008 . the results were evaluated by macnab 's criteria after a minimum followup of 12 months and maximum up to 24 months.resultsthe results were excellent to good , 8% had fair , and 2% had poor results . discectomy was performed in 15 mm skin and fascial incisions and the nerve root injury in 2 patients was absent . in total , 300 patients were able to r...",background : many studies have not been reported in literature for lumbar discectomy by destandau endospine system . we report a series of 300 patients operated for lumbar dissectomy by destandau endospine system . technique comprised localization of symptomatic level followed by insertion of an endospine system devise through a 15 mm skin and fascial incision . results were evaluated by macnab 's criteria after a minimum followup of 12 months and maximum up to 24 months.aim:the aim of this study was to present results in 300 patients operated by edoscopic discectomy and to discuss technic...,background : posterior endoscopic discectomy is an established method for treatment of lumbar disc herniation . \n many studies have not been reported in literature for lumbar discectomy by destandau endospine system . \n we report a series of 300 patients operated for lumbar dissectomy by destandau endospine system.materials and methods : a total of 300 patients suffering from lumbar disc herniations were operated between january 2002 and december 2008 . \n all patients were operated as day care procedure . \n technique comprised localization of symptomatic level followed by insertion of ...
7,"the femoral head often leads to healing complications , while the more predictable prosthetic replacements are associated with poorer function and significant complications . the treatment of these fractures depends on the age of the patient , fracture displacement , bone quality , timing of surgery and activity level of the patient . displaced fractures in healthy , active patients are best treated by reduction and internal fixation . there are however , complications unique to femoral neck fractures which are almost impossible to predict . non - union and avascular necrosis non - uni...","a displaced fracture of the neck of the femoral head often leads to healing complications , while the more predictable prosthetic replacements are associated with poorer function and significant complications . \n the treatment of these fractures depends on the age of the patient , fracture displacement , bone quality , timing of surgery and activity level . in healthy , active patients are best treated by reduction and internal fixation . in the young active patients \n there are however , complications unique to femoral neck fractures which are almost impossible to predict . non - union...","purpose . to report a case of non - union of femoral neck fracture with cannulated screws in an inverted triangle . methods . a 38-year - old male businessman reported to the out door department of our hospital with a history of a fall from height . radiologically , the fracture was graded as garden type 4 with garden alignment index as a yardstick of acceptable reduction . on operative day the patient was operated within 24 h and the radiographs started showing signs of union . at this point twenty - six months into the post operative period the patient continued to complain of mild groin...","the management of failed internal fixation is based on the careful consideration of various factors . in young patients revision internal fixation with cancellous or muscle pedicle bone grafting ( vascularised bone graft ) or an osteotomy results in useful outcome . we report a case of non - union of the fracture of the neck of the femur , who refused additional procedures after his non - union had been established . a 38-year old male businessman reported to the out door department of our hospital with a history of a fall from height . clinical and radiological examination revealed a disp...",fracture of the femoral neck continues to be a vexing clinical and therapeutic challenge for the orthopedic surgeon . \n the fracture has a propensity for non - union and avascular necrosis . \n it is a challenge for the orthopedic surgeon to decide when to intervene in a case with non - union where the implant continues to be in place . \n we present a case with persistent clinical and radiological non - union signs where the fracture eventually united after 32 months . \n the case bolsters the view that a continued conservative regime might entail good results in such situations .
8,"granuloma faciale ( gf ) is an uncommon , benign , inflammatory skin disorder of unknown etiology . it is characterized by single or multiple , grey - brown or violaceous nodules or plaques primarily occurring on the face and occasionally at extra - facial sites . the disease is notoriously resistant to therapies and often tends to relapse when treatment is discontinued . we present a patient with multiple lesions of gf and its response to topical tacrolimus . a 35-year - old female presented to our department with a 5 year history of single , asymptomatic , grey - brown pigmented , no...","granuloma faciale ( gf ) is an uncommon , benign , inflammatory skin disorder of unknown etiology . \n it is characterized by single or multiple , grey - brown or violaceous nodules or plaques primarily occurring on the face and occasionally at extra - facial sites . EntityItem the disease is notoriously resistant to therapies and often tends to relapse when treatment is discontinued . assetsadobe we present a patient with multiple lesions of gf and its response to topical tacrolimus .  a 35-year - old female presented to our department with a 5 year history of single , asymptomatic , grey","granuloma faciale ( gf ) is an uncommon , benign , inflammatory skin disorder characterized by single or multiple , grey - brown or violaceous plaques or nodules primarily occurring on the face and occasionally at extra - facial sites . the disease is notoriously resistant to therapies and often tends to relapse when treatment is discontinued . we describe a 35-year - old female who presented to our department with a 5 year history of single asymptomatic , brown pigmented , non - tender plaques over the left cheek , left forehead , both arms and upper back . cutaneous biopsy ( 4 mm ) from ...","granuloma faciale ( gf ) is an uncommon , benign , inflammatory skin disorder of unknown etiology . it is characterized by single or multiple , grey - brown or violaceous nodules or plaques primarily occurring on the face and occasionally at extra - facial sites . the disease is notoriously resistant to therapies and often tends to relapse when treatment is discontinued . we present a patient with multiple lesions of gf and its response to topical tacrolimus . a 35-year - old female presented to our department with a 5 year history of single , asymptomatic , grey - brown pigmented , over t...",granuloma faciale ( gf ) is a chronic condition characterized by red - brown plaques with follicular accentuation present usually on the face . \n we present a case of 35-year - old female with 5 year history of plaques over cheek and extra facial sites consistent with gf and its response to topical tacrolimus . \n this case supports previous reports of successful treatment of gf with topical tacrolimus .
9,"chronic myelomonocytic leukaemia ( cmmol ) is a rare disease with an incidence of 0.370.72 per 100 000 population . the median age at diagnosis varies between 65 years and 75 years [ 14 ] . the radical treatment is allogeneic stem cell transplantation , although this treatment is unsuitable for most patients because of their advanced age [ 6 , 7 ] . the median survival of patients with cmmol is 1520 months [ 5 , 6 , 8 ] . several case reports have indicated that excess surgical stress causes an acute blastic crisis , which contributes to a poor outcome [ 9 , 10 ] . a 68-year - old man ...","chronic myelomonocytic leukaemia ( cmmol ) is a rare disease with an incidence of 0.370.72 per 100 000 population . the median age at diagnosis varies between 65 years and 75 years [ 14 ] . \n the radical treatment is allogeneic stem cell transplantation , although this treatment is unsuitable for most patients because of their advanced age [ 6 , 7 ] .","chronic myelomonocytic leukaemia ( cmmol ) is a rare disease with an incidence of 0.370.72 per 100 000 population . the median survival of patients with cmmol is 1520 months . a 68-year - old man visited the emergency department of asahi general hospital for the evaluation of left lower quadrant abdominal pain . on 12 may 2012 , the right molar teeth ( 47 and 48 ) showed grade iii mobility , and their gingivae were painful with continuous bleeding . we used a surgical dressing pack ( coe - pak ) to suture 30 silk threads above it . after the initial visit the patient was referred to the de...","chronic myelomonocytic leukaemia ( cmmol ) is a rare disease with an incidence of 0.370.72 per 100 000 population . the radical treatment is allogeneic stem cell transplantation , although this treatment is unsuitable for most patients because of their advanced age . the survival of patients with cmmol is 1520 months . several case reports have indicated that excess surgical stress causes an acute blastic crisis , which contributes to a poor outcome . we present a 68-year - old man with cmmol who was admitted to the emergency department of asahi general hospital for an acute blastic crisis...","we report a case of chronic myelomonocytic leukaemia ( cmmol ) in a 68-year - old man who developed osteomyelitis of the mandible . at the initial visit \n , he reported uncontrolled gingival bleeding , despite self - administered haemostasis . \n he complained of severe pain in the socket , despite potent opioid analgesia . \n after consultation with the internal medicine specialists , we undertook a surgical anti - inflammatory approach that included sequestrectomy with massive blood transfusion . \n his physical condition was ameliorated after the surgical procedure , and he was dischar..."


### Calculation of Metrics

In [None]:
# Installing packages to be used to calculate metrics
!pip install nltk rouge-score bert-score sacrebleu

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
import nltk
from rouge_score import rouge_scorer
import bert_score
import sacrebleu

# A function which calculates all the metrics
def evaluate_summary(predicted_summary, ground_truth_summary):
    # ROUGE scores
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(ground_truth_summary, predicted_summary)
    rouge_1 = scores['rouge1'].fmeasure
    rouge_2 = scores['rouge2'].fmeasure
    rouge_l = scores['rougeL'].fmeasure

    # BERTScore
    bert_precision, bert_recall, bert_f1 = bert_score.score([predicted_summary], [ground_truth_summary], lang='en', model_type='bert-base-uncased')

    # BLEU score
    predicted_tokens = nltk.word_tokenize(predicted_summary.lower())
    ground_truth_tokens = nltk.word_tokenize(ground_truth_summary.lower())
    bleu_score = nltk.translate.bleu_score.sentence_bleu([ground_truth_tokens], predicted_tokens)

    # SacreBLEU
    sacrebleu_score = sacrebleu.corpus_bleu([predicted_summary], [[ground_truth_summary]])

    # METEOR
    meteor_score = nltk.translate.meteor_score.meteor_score([ground_truth_tokens], predicted_tokens)

    return {
        'rouge-1': rouge_1,
        'rouge-2': rouge_2,
        'rouge-l': rouge_l,
        'bertscore-precision': bert_precision.item(), 
        'bertscore-recall': bert_recall.item(), 
        'bertscore-f1': bert_f1.item(),
        'bleu': bleu_score,
        'sacrebleu': sacrebleu_score.score,
        'meteor': meteor_score
    }

In [None]:
import numpy as np

# A function which calls the evaluare summary function, for list of summaries and ground truth
def get_eval_metrics(candidate_sum, reference_sum):
  rouge1_scores = []
  rouge2_scores = []
  rougeL_scores = []
  bert_score_p = []
  bert_score_r = []
  bert_score_f1 = []
  bleu_scores = []
  sacrebleu_scores = []
  meteor_scores = []

  for i in range(len(candidate_sum)):
    metrics = evaluate_summary(candidate_sum[i], reference_sum[i])

    rouge1_scores.append(metrics['rouge-1'])
    rouge2_scores.append(metrics['rouge-2'])
    rougeL_scores.append(metrics['rouge-l'])

    bert_score_p.append(metrics['bertscore-precision'])
    bert_score_r.append(metrics['bertscore-recall'])
    bert_score_f1.append(metrics['bertscore-f1'])

    bleu_scores.append(metrics['bleu'])
    sacrebleu_scores.append(metrics['sacrebleu'])
    meteor_scores.append(metrics['meteor'])

  return {
      'rouge-1': np.mean(rouge1_scores),
      'rouge-2': np.mean(rouge2_scores),
      'rouge-l': np.mean(rougeL_scores),
      'bertscore-precision': np.mean(bert_score_p), 
      'bertscore-recall': np.mean(bert_score_r), 
      'bertscore-f1': np.mean(bert_score_f1),
      'bleu': np.mean(bleu_scores),
      'sacrebleu': np.mean(sacrebleu_scores),
      'meteor': np.mean(meteor_scores)
  }

<b>For BART</b>

In [None]:
predicted_summary = results_BartT5_Pegasus_df["Bart_Gen_Summary"]
ground_truth_summary = results_BartT5_Pegasus_df["reference_summary"]
bart_scores = get_eval_metrics(predicted_summary, ground_truth_summary)

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [None]:
# Results for BART Score
bart_scores

{'rouge-1': 0.37360333333395135,
 'rouge-2': 0.16042688578105405,
 'rouge-l': 0.2571635897245544,
 'bertscore-precision': 0.6521112106063149,
 'bertscore-recall': 0.6046236672184684,
 'bertscore-f1': 0.6253015724095431,
 'bleu': 0.07677426683661569,
 'sacrebleu': 8.704352468679211,
 'meteor': 0.23721571628062071}

<b>For T5</b>

In [None]:
predicted_summary = results_BartT5_Pegasus_df["T5_Gen_Summary"]
ground_truth_summary = results_BartT5_Pegasus_df["reference_summary"]
t5_scores = get_eval_metrics(predicted_summary, ground_truth_summary)

In [None]:
# Results for T5
t5_scores

{'rouge-1': 0.4008208242251887,
 'rouge-2': 0.13504248666439614,
 'rouge-l': 0.24264236246232793,
 'bertscore-precision': 0.6313560225746848,
 'bertscore-recall': 0.6240223808722063,
 'bertscore-f1': 0.625616582957181,
 'bleu': 0.08507551910837848,
 'sacrebleu': 9.098023358134133,
 'meteor': 0.2684689117250793}

<b>For Pegasus</b>

In [None]:
predicted_summary = results_BartT5_Pegasus_df["Pegasus_Gen_Summary"]
ground_truth_summary = results_BartT5_Pegasus_df["reference_summary"]
pegasus_scores = get_eval_metrics(predicted_summary, ground_truth_summary)

In [None]:
# Results for Pegasus
pegasus_scores

{'rouge-1': 0.4402336430643057,
 'rouge-2': 0.17422600135745264,
 'rouge-l': 0.25060231114087017,
 'bertscore-precision': 0.6377523649822582,
 'bertscore-recall': 0.661047258160331,
 'bertscore-f1': 0.6467721895738081,
 'bleu': 0.11860406769339003,
 'sacrebleu': 12.78788529124314,
 'meteor': 0.34520792629647584}

<b>For GPT3</b>

In [None]:
predicted_summary = results_df["GPT_summary"]
ground_truth_summary = results_df["reference_summary"]
gpt_scores = get_eval_metrics(predicted_summary, ground_truth_summary)

In [None]:
# Results for GPT3
gpt_scores

{'rouge-1': 0.45729488745243857,
 'rouge-2': 0.17829806155658995,
 'rouge-l': 0.2783718961710552,
 'bertscore-precision': 0.6565241271799261,
 'bertscore-recall': 0.6616951173002069,
 'bertscore-f1': 0.6585256078026511,
 'bleu': 0.1175864113250646,
 'sacrebleu': 8.416554426874193,
 'meteor': 0.31643575605767976}

<b>For GPT4</b>

In [None]:
gpt4_df = pd.read_csv("GPT4_responses.csv")

In [None]:
predicted_summary = gpt4_df["GPT4_Summary"]
ground_truth_summary = results_df["reference_summary"]
gpt4_scores = get_eval_metrics(predicted_summary, ground_truth_summary)

In [None]:
# Results for GPT4
gpt4_scores

{'rouge-1': 0.49864013597092177,
 'rouge-2': 0.21032328131161238,
 'rouge-l': 0.31881340658874885,
 'bertscore-precision': 0.6682558276436545,
 'bertscore-recall': 0.6995635032653809,
 'bertscore-f1': 0.6832759326154535,
 'bleu': 0.12367896937325663,
 'sacrebleu': 10.5312713027105,
 'meteor': 0.3715520311666836}

## Creating Final DataFrame for Results

In [None]:
results_BartT5_Pegasus_df["GPT_Gen_Summary"] = results_df["GPT_summary"]

In [None]:
results_BartT5_Pegasus_df.to_csv("LLMs_testResults.csv")

In [None]:
bart_res_df = pd.DataFrame(bart_scores, index=[0])
t5_res_df = pd.DataFrame(t5_scores, index=[0])
pegasus_res_df = pd.DataFrame(pegasus_scores, index=[0])
gpt3_5_res_df = pd.DataFrame(gpt_scores, index=[0])
gpt4_res_df = pd.DataFrame(gpt4_scores, index=[0])

resDf = pd.concat([bart_res_df, t5_res_df, pegasus_res_df, gpt3_5_res_df, gpt4_res_df], ignore_index=True)

In [None]:
name = ['BART', 'T5', "Pegasus", "GPT3", "GPT4"]
resDf['Model-Name'] = name

In [None]:
resDf2 = resDf[['Model-Name'] + np.delete(resDf.columns.values, np.where(resDf.columns.values == 'Model-Name')).tolist()]

In [None]:
resDf2

Unnamed: 0,Model-Name,rouge-1,rouge-2,rouge-l,bertscore-precision,bertscore-recall,bertscore-f1,bleu,sacrebleu,meteor
0,BART,0.373603,0.160427,0.257164,0.652111,0.604624,0.625302,0.076774,8.704352,0.237216
1,T5,0.400821,0.135042,0.242642,0.631356,0.624022,0.625617,0.085076,9.098023,0.268469
2,Pegasus,0.440234,0.174226,0.250602,0.637752,0.661047,0.646772,0.118604,12.787885,0.345208
3,GPT3.5,0.457295,0.178298,0.278372,0.656524,0.661695,0.658526,0.117586,8.416554,0.316436
4,GPT4,0.49864,0.210323,0.318813,0.668256,0.699564,0.683276,0.123679,10.531271,0.371552


In [None]:
resDf2.to_csv('Final_Results.csv')