In [48]:
!pip install torch
!pip install transformers



In [64]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import textwrap

# Constants
MODEL_NAME = "Vamsi/T5_Paraphrase_Paws"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(DEVICE)


@torch.no_grad()
def paraphrase_partial_range(
    paragraph: str,
    start_index: int,
    end_index: int,
    max_input_length: int = 512,
    max_output_length: int = 400,
    top_k: int = 120,
    top_p: float = 0.95,
) -> str:
    """
    Paraphrases a specific word range within a paragraph and leaves the rest unchanged.

    Args:
        paragraph (str): Input paragraph.
        start_index (int): Start word index (inclusive).
        end_index (int): End word index (exclusive).
        max_input_length (int): Max token length for encoder.
        max_output_length (int): Max token length for decoder output.
        top_k (int): Top-k sampling for diversity.
        top_p (float): Top-p (nucleus) sampling for diversity.

    Returns:
        str: Partially paraphrased paragraph.
    """
    words = paragraph.strip().split()

    if start_index < 0 or end_index > len(words) or start_index >= end_index:
        raise ValueError("Invalid start or end index for paraphrasing range.")

    # Split paragraph into three parts
    before = " ".join(words[:start_index])
    to_paraphrase = " ".join(words[start_index:end_index])
    after = " ".join(words[end_index:])

    # Prepare input prompt
    prompt = f"paraphrase: {to_paraphrase} </s>"

    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=max_input_length,
    ).to(DEVICE)

    # Generate paraphrased text
    output = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_length=max_output_length,
        num_return_sequences=1,
        do_sample=True,
        top_k=top_k,
        top_p=top_p,
    )

    paraphrased_part = tokenizer.decode(
        output[0],
        skip_special_tokens=True,
        clean_up_tokenization_spaces=True,
    )

    # Combine original and paraphrased parts
    parts = []
    if before:
        parts.append(before)
    parts.append(paraphrased_part)
    if after:
        parts.append(after)

    return " ".join(parts).strip()


if __name__ == "__main__":
    input_paragraph = (
       """ The Supply Chain Environmental Transformation plan 2024-2027 contains the mandatory chemical management requirements for the period 2024-2027.
         Those requirements will be added to the current methane chlorophloro carbon minimum requirements covered by the Green to Wear standard:
         (https://www.inditex.com/itxcomweb/api/media/0176de52-5436-46dc-9490-c91351b71cdd/GTW%202.1%20English%202023.pdf?t=1741164770911).
         According to the on-site diagnosis, the level of compliance with the new chemical requirements 2024-2027 are given as follows:"""

    )
    # Customize the word index range here
    start = 0
    end = 40

    result = paraphrase_partial_range(input_paragraph, start, end)

    print("\n🔹 Original Paragraph:\n", input_paragraph.strip())
    print(f"\n✅ Paraphrased from word {start} to {end}:\n", textwrap.fill(result.strip(), width=150))



🔹 Original Paragraph:
 The Supply Chain Environmental Transformation plan 2024-2027 contains the mandatory chemical management requirements for the period 2024-2027.
         Those requirements will be added to the current methane chlorophloro carbon minimum requirements covered by the Green to Wear standard:
         (https://www.inditex.com/itxcomweb/api/media/0176de52-5436-46dc-9490-c91351b71cdd/GTW%202.1%20English%202023.pdf?t=1741164770911).
         According to the on-site diagnosis, the level of compliance with the new chemical requirements 2024-2027 are given as follows:

✅ Paraphrased from word 0 to 40:
 The 2024-2027 Supply Chain Environmental Transformation Plan contains the mandatory chemical management requirements for the 2024-2027 period, adding
to the current methane chlorophlorocarbon minimum requirements from the Green to Wear standard
(https://www.inditex.com/itxcomweb/api/media/0176de52-5436-46dc-9490-c91351b71cdd/GTW%202.1%20English%202023.pdf?t=1741164770911). t

In [70]:
!pip install nltk
nltk.download('punkt_tab')



[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [72]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import textwrap
import nltk
from nltk.tokenize import sent_tokenize

# Download NLTK punkt tokenizer if not already available
nltk.download("punkt")

# Constants
MODEL_NAME = "Vamsi/T5_Paraphrase_Paws"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(DEVICE)


@torch.no_grad()
def paraphrase_chunk(
    text: str,
    max_input_length: int = 512,
    max_output_length: int = 512,
    top_k: int = 120,
    top_p: float = 0.95,
) -> str:
    """Paraphrase a single text chunk."""
    prompt = f"paraphrase: {text.strip()} </s>"

    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        padding="max_length",
        truncation=True,
        max_length=max_input_length,
    ).to(DEVICE)

    output = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_length=max_output_length,
        num_return_sequences=1,
        do_sample=True,
        top_k=top_k,
        top_p=top_p,
    )

    return tokenizer.decode(output[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)


def paraphrase_full_text(paragraph: str) -> str:
    """Split long paragraph and paraphrase it chunk by chunk."""
    sentences = sent_tokenize(paragraph)
    paraphrased_sentences = []

    for sent in sentences:
        try:
            paraphrased = paraphrase_chunk(sent)
            paraphrased_sentences.append(paraphrased)
        except Exception as e:
            print(f"❌ Error paraphrasing: {sent[:60]}... → {e}")
            paraphrased_sentences.append(sent)  # fallback to original

    return " ".join(paraphrased_sentences).strip()


if __name__ == "__main__":
    input_paragraph = (
        """The Supply Chain Environmental Transformation plan 2024-2027 contains the mandatory chemical management requirements for the period 2024-2027.
        Those requirements will be added to the current methane chlorophloro carbon minimum requirements covered () by the Green to Wear standard:
        (https://www.inditex.com/itxcomweb/api/media/0176de52-5436-46dc-9490-c91351b71cdd/GTW%202.1%20English%202023.pdf?t=1741164770911).
        According to the on-site diagnosis, the level of compliance with the new chemical requirements 2024-2027 are given as follows:"""
    )

    result = paraphrase_full_text(input_paragraph)

    print("\n🔹 Original Paragraph:\n")
    print(textwrap.fill(input_paragraph.strip(), width=150))

    print("\n✅ Paraphrased Full Paragraph:\n")
    print(textwrap.fill(result.strip(), width=150))


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!



🔹 Original Paragraph:

The Supply Chain Environmental Transformation plan 2024-2027 contains the mandatory chemical management requirements for the period 2024-2027.
Those requirements will be added to the current methane chlorophloro carbon minimum requirements covered by the Green to Wear standard:
(https://www.inditex.com/itxcomweb/api/media/0176de52-5436-46dc-9490-c91351b71cdd/GTW%202.1%20English%202023.pdf?t=1741164770911).         According
to the on-site diagnosis, the level of compliance with the new chemical requirements 2024-2027 are given as follows:

✅ Paraphrased Full Paragraph:

The 2024-2027 Supply Chain Environmental Transformation Plan contains mandatory chemical management requirements for the period 2024-2027. Those
requirements will be added to the current minimum chlorophloro carbon requirements that are covered in the Green to Wear Standard:
(https://www.inditex.com/itxcomweb/api/media/0176de52-5436-46dc-9490-c91351b71cdd/GTW%202.1%20English%202023.pdf?t=17411647