In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")

model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to(device)

def paraphrase(
    question,
    num_beams=5,
    num_beam_groups=5,
    num_return_sequences=1,
    repetition_penalty=10.0,
    diversity_penalty=3.0,
    no_repeat_ngram_size=2,
    temperature=0.7,
    max_length=128
):
    input_ids = tokenizer(
        f'paraphrase: {question}',
        return_tensors="pt", padding="longest",
        max_length=max_length,
        truncation=True,
    ).input_ids.to(device)
    
    outputs = model.generate(
        input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
        num_beams=num_beams, num_beam_groups=num_beam_groups,
        max_length=max_length, diversity_penalty=diversity_penalty
    )

    res = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    return res


In [3]:
paragraph = """  Hugging Face is an innovative AI company that has become a leading platform for natural language processing (NLP) and machine learning tools. Founded in 2016, it started as a chatbot app but soon pivoted to focus on developing open-source models and libraries for NLP tasks. Hugging Face is best known for its **Transformers library**, which provides pre-trained models for tasks like text classification, translation, summarization, and question-answering. These models, especially BERT, GPT, and T5, have become industry standards for NLP applications.

The platform's mission is to democratize AI by making state-of-the-art models accessible to researchers, developers, and organizations. Hugging Face provides tools that simplify the development, training, and deployment of machine learning models. Its **Model Hub** allows users to easily share and access thousands of pre-trained models in various domains, from NLP to vision and audio tasks.

Hugging Face also supports **AutoML**, enabling users to fine-tune models on their own data without deep expertise in machine learning. With a vibrant open-source community, Hugging Face is at the forefront of AI innovation, providing powerful, user-friendly tools that fuel advancements in machine learning and AI research across industries. """

paraphrase(paragraph)


['Founded in 2016, Hugging Face is an innovative AI company that has become a leading player in natural language processing (NLP) and machine learning tools. Its main focus was on developing open-source models and libraries for NLP tasks, with primarily non-MIT proprietary models available such as the Transformers library. These models have established standards that are used extensively in industry applications.']

In [4]:
import nltk
# nltk.download('punkt_tab')
from nltk.tokenize import sent_tokenize

In [9]:
# Tokenize the paragraph into sentences
sentences = sent_tokenize(paragraph)

paraphrased_sentences = []

for sentence in sentences:
    paraphrased_result = paraphrase(sentence, num_beams=2, num_beam_groups=2, num_return_sequences=1)
    paraphrased_sentences.append(paraphrased_result[0])  # Take the first paraphrased output

# Join the paraphrased sentences back together
paraphrased_paragraph = " ".join(paraphrased_sentences)

print("Original Paragraph:\n", paragraph)
print("\nParaphrased Paragraph: \n", paraphrased_paragraph)
len(sentences),len(paraphrased_sentences) 

Original Paragraph:
   Hugging Face is an innovative AI company that has become a leading platform for natural language processing (NLP) and machine learning tools. Founded in 2016, it started as a chatbot app but soon pivoted to focus on developing open-source models and libraries for NLP tasks. Hugging Face is best known for its **Transformers library**, which provides pre-trained models for tasks like text classification, translation, summarization, and question-answering. These models, especially BERT, GPT, and T5, have become industry standards for NLP applications.

The platform's mission is to democratize AI by making state-of-the-art models accessible to researchers, developers, and organizations. Hugging Face provides tools that simplify the development, training, and deployment of machine learning models. Its **Model Hub** allows users to easily share and access thousands of pre-trained models in various domains, from NLP to vision and audio tasks.

Hugging Face also supports

(9, 9)