In [None]:
!pip install happytransformer
!pip install sentencepiece

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from happytransformer import HappyTextToText, TTSettings
import pandas as pd
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

In [None]:
# adding models
happy_t5 = HappyTextToText("T5", "t5-base")
happy_db = HappyTextToText("DISTILBART", "sshleifer/distilbart-cnn-12-6")
happy_peg = HappyTextToText("Pegasus", "google/pegasus-big_patent")
happy_db_temp = TTSettings(do_sample=True, top_k=0, top_p=0.93, temperature=0.8)

model_name_paraphrase = 'tuner007/pegasus_paraphrase'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer_paraphrase = PegasusTokenizer.from_pretrained(model_name_paraphrase)
model_paraphrase = PegasusForConditionalGeneration.from_pretrained(model_name_paraphrase).to(torch_device)

model_name_summarizer = 'tuner007/pegasus_summarizer'
tokenizer_summarizer = PegasusTokenizer.from_pretrained(model_name_summarizer)
model_summarizer = PegasusForConditionalGeneration.from_pretrained(model_name_summarizer).to(torch_device)

# pegasus_paraphrase function
def get_paraphrase(input_text,temperature):
  batch = tokenizer_paraphrase([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
  translated = model_paraphrase.generate(**batch,do_sample=True,max_length=60,num_beams=10, num_return_sequences=1, temperature=temperature)
  tgt_text = tokenizer_paraphrase.batch_decode(translated, skip_special_tokens=True)
  return tgt_text

# pegasus_summarizer function
def get_summarizer(input_text, temperature):
  batch = tokenizer_summarizer(input_text,truncation=True,padding='longest',max_length=1024, return_tensors="pt").to(torch_device)
  gen_out = model_summarizer.generate(**batch,do_sample=True,max_length=128,num_beams=5, num_return_sequences=1, temperature=temperature)
  output_summarizer = tokenizer_summarizer.batch_decode(gen_out, skip_special_tokens=True)
  return output_summarizer


In [None]:
data = pd.read_csv("test_data_4000.csv")
# data.drop(columns=['id'], inplace=True)
context = data['context']

In [None]:
outputs_t5 = []
outputs_db = []
outputs_dbpar = []
outputs_peg_paraphrase = []
outputs_peg_summarizer = []
outputs_peg_paraphrase_t = []
outputs_peg_summarizer_t = []
outputs_peg_google = []
i=1
for cont in context:
  cont = "summarize: " + cont

  # T5 using Happy Transformer
  outputs_t5.append(happy_t5.generate_text(cont).text)

  # DISTILBART using Happy Transformer
  outputs_db.append(happy_db.generate_text(cont).text)
  outputs_dbpar.append(happy_db.generate_text(cont, args=happy_db_temp).text)

  # pegasus_paraphrase from tuner007
  outputs_peg_paraphrase.append(get_paraphrase(cont,1.5)[0])
  outputs_peg_paraphrase_t.append(get_paraphrase(cont,0.8)[0])

  # pegasus_summarizer from tuner007
  outputs_peg_summarizer.append(get_summarizer(cont,1.5)[0])
  outputs_peg_summarizer_t.append(get_summarizer(cont,0.8)[0])

  # pegasus-big_patent from google
  outputs_peg_google.append(happy_peg.generate_text(cont).text)
  
  print(i)
  i+=1

In [None]:
data['Happy-T5'] = outputs_t5
data['DistilBART'] = outputs_db
data['DistilBART_Params'] = outputs_dbpar
data['Pegasus_Paraphrase_1.5'] = outputs_peg_paraphrase
data['Pegasus_Paraphrase_0.8'] = outputs_peg_paraphrase_t
data['Pegasus_summarizer_1.5'] = outputs_peg_summarizer
data['Pegasus_summarizer_0.8'] = outputs_peg_summarizer_t
data['Pegasus_BigPatent'] = outputs_peg_google
data.to_csv("output.csv", index=False)