Note: This paraphraser is trained to paraphrase short English sentences and works best for those inputs.

In [None]:
%pip install transformers

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model = AutoModelForSeq2SeqLM.from_pretrained("ramsrigouthamg/t5-large-paraphraser-diverse-high-quality")
tokenizer = AutoTokenizer.from_pretrained("ramsrigouthamg/t5-large-paraphraser-diverse-high-quality")

In [5]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print ("device ",device)
model = model.to(device)

device  cuda


In [None]:
text = "Four private astronauts launched to orbit by Elon Musk’s SpaceX returned to Earth Saturday evening, splashing down into the ocean off the east coast of Florida after a three-day mission."

In [None]:
output = "After a three-day mission, four private astronauts sent by Elon Musk's SpaceX returned to Earth on Saturday evening, splashing down into the ocean off the east coast of Florida."

In [None]:
context = "Once, a group of frogs was roaming around the forest in search of water."
text = "paraphrase: " + context + " </s>"

In [None]:
context = "Gradient descent is an optimization algorithm which is commonly-used to train machine learning models and neural networks. Training data helps these models learn over time, and the cost function within gradient descent specifically acts as a barometer, gauging its accuracy with each iteration of parameter updates."
text = "paraphrase: "+context + " </s>"

In [6]:
# Beam Search
# It means that the model will generate the most probable sequence of words, then it will generate the next most probable sequence of words, and so on until it reaches the maximum length of the sequence.

encoding = tokenizer.encode_plus(
    text, max_length=128, padding=True, return_tensors="pt")
input_ids, attention_mask = encoding["input_ids"].to(
    device), encoding["attention_mask"].to(device)

model.eval()

beam_outputs = model.generate(
    input_ids=input_ids,
    attention_mask=attention_mask,
    max_length=128,
    early_stopping=True,
    num_beams=15,
    num_return_sequences=3

)

print("\n\n")
print("Original: ", context)

for beam_output in beam_outputs:
    sent = tokenizer.decode(
        beam_output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
    print(sent)






Original:  Once, a group of frogs was roaming around the forest in search of water.
paraphrasedoutput: A herd of frogs was wandering around the woods in search of water.
paraphrasedoutput: A herd of frogs was wandering around the woods in search of water once more.
paraphrasedoutput: A gang of frogs was wandering around the woods in search of water once more.


**Diverse** : here means that pairs of sentences are selected such that there is a significant difference in word order or at least the paraphrased output differs by multiple word changes.

In [7]:
# Diverse Beam search
# It is a method to generate diverse set of paraphrases. It is a combination of beam search and nucleus sampling. It is a greedy search algorithm that generates a diverse set of paraphrases by penalizing the generation of similar paraphrases. It is a simple and effective method to generate diverse paraphrases.

encoding = tokenizer.encode_plus(
    text, max_length=128, padding=True, return_tensors="pt")
input_ids, attention_mask = encoding["input_ids"].to(
    device), encoding["attention_mask"].to(device)

model.eval()

diverse_beam_outputs = model.generate(
    input_ids=input_ids,
    attention_mask=attention_mask,
    max_length=128,
    early_stopping=True,
    num_beams=5,
    num_beam_groups=5,
    num_return_sequences=5,
    diversity_penalty=0.70

)

print("\n\n")
print("Original: ", context)

for beam_output in diverse_beam_outputs:
    sent = tokenizer.decode(
        beam_output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
    print(sent)






Original:  Once, a group of frogs was roaming around the forest in search of water.
paraphrasedoutput: A herd of frogs was wandering around the woods in search of water.
paraphrasedoutput: A herd of frogs was wandering around the woods in search of water.
paraphrasedoutput: A gang of frogs was wandering around the forest in search of water at one time.
paraphrasedoutput: A herd of frogs was swaning around the woods in search of water.
paraphrasedoutput: A gang of frogs was roaming about the woods in search of water once more.
