# Setup

# Imports & Common Setup

In [None]:
import torch
from transformers import AutoTokenizer,AutoModelForSeq2SeqLM

# Load BART

In [None]:
bart_tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
bart_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")

Please make sure the generation config includes `forced_bos_token_id=0`. 


Loading weights:   0%|          | 0/511 [00:00<?, ?it/s]

# BART Paraphrasing Function

In [None]:
def bart_paraphrase(text,max_length=128,num_beams=5):
    inputs = bart_tokenizer(
        text,
        return_tensors="pt",
        truncation=True
    )

    outputs=bart_model.generate(
        **inputs,
        max_length = max_length,
        num_beams = num_beams,
        early_stopping = True
    )

    return bart_tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test BART

In [None]:
input_text = "The model achieved strong performance on previously unseen data."

bart_output = bart_paraphrase(input_text)
print("BART Output:", bart_output)


BART Output: The model achieved strong performance on previously unseen data. The model was based on data that had never been seen before. The data was taken from a database of more than 1.5 billion data points. The results were based on a set of data points that had not been seen previously.


# Load T5

In [None]:
t5_model_name = "t5-small"

t5_tokenizer = AutoTokenizer.from_pretrained(t5_model_name)
t5_model = AutoModelForSeq2SeqLM.from_pretrained(t5_model_name)


Loading weights:   0%|          | 0/131 [00:00<?, ?it/s]

# T5 Paraphrasing Function

In [None]:
def t5_paraphrase(text, max_length=128, num_beams=5):
    input_text = (
    "paraphrase the following sentence in different words: " + text
)


    inputs = t5_tokenizer(
        input_text,
        return_tensors="pt",
        truncation=True,
        max_length=512
    )

    with torch.no_grad():
        outputs = t5_model.generate(
            **inputs,
            max_length=max_length,
            num_beams=num_beams,
            early_stopping=True
        )

    decoded_text = t5_tokenizer.decode(
        outputs[0],
        skip_special_tokens=True
    )

    return decoded_text


# Test T5

In [None]:
input_text = "Because T5 is a general text-to-text model. Without task-specific fine-tuning or strong prompting, it may default to Boolean-style outputs learned during pretraining."
t5_output = t5_paraphrase(input_text)
print("T5 Output:", t5_output)


T5 Output: Paraphrase the following sentence in different words: Because T5 is a general text-to-text model. Without task-specific fine-tuning or strong prompting, it may default to Boolean-style outputs learned during pretraining.


In [None]:
print("Original :", input_text)
print("T5       :", t5_output)
print("BART     :", bart_output)


Original : Because T5 is a general text-to-text model. Without task-specific fine-tuning or strong prompting, it may default to Boolean-style outputs learned during pretraining.
T5       : Paraphrase the following sentence in different words: Because T5 is a general text-to-text model. Without task-specific fine-tuning or strong prompting, it may default to Boolean-style outputs learned during pretraining.
BART     : The model achieved strong performance on previously unseen data. The model was based on data that had never been seen before. The data was taken from a database of more than 1.5 billion data points. The results were based on a set of data points that had not been seen previously.
