<a href="https://colab.research.google.com/github/marb543/CART498-GenAI/blob/main/A2/assignment_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch

# Load GPT-2 tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")


def apply_p_plus_n(text, n):
    """
    Apply the P+N technique where the last word of each line is replaced with
    the token having the nth highest probability from GPT-2 predictions.
    """
    #Split the lines by new lines
    lines = text.split("\n")
    #Create an array for lines
    modified_lines = []
    #Loop through array of lines
    for line in lines:
        #Split the line by words
        words = line.split()
        if not words:
            continue

        last_word = words[-1].rstrip(".,;!?")
        context = " ".join(words[:-1])

        # Tokenize the context and add the last word
        input_ids = tokenizer.encode(context + " " + last_word, return_tensors="pt")

        # Generate predictions for the next token
        with torch.no_grad():
            outputs = model(input_ids)

        logits = outputs.logits[0, -1]
        probabilities = torch.softmax(logits, dim=-1)

        # Get the nth most probable token
        top_indices = torch.topk(probabilities, k=max(n, 7)).indices
        new_word = tokenizer.decode(top_indices[n - 1] if len(top_indices) >= n else top_indices[-1]).strip()

        # Replace the last word and reconstruct the line
        modified_line = " ".join(words[:-1] + [new_word])
        modified_lines.append(modified_line)

    return "\n".join(modified_lines)

# Input text (The Snow Man by Wallace Stevens)
input_text = """One must have a mind of winter
To regard the frost and the boughs
Of the pine-trees crusted with snow;
And have been cold a long time
To behold the junipers shagged with ice,
The spruces rough in the distant glitter
Of the January sun; and not to think
Of any misery in the sound of the wind,
In the sound of a few leaves,
Which is the sound of the land
Full of the same wind
That is blowing in the same bare place
For the listener, who listens in the snow,
And, nothing himself, beholds
Nothing that is not there and the nothing that is."""

# Apply P+7 and P+21 transformations
processed_text_p7 = apply_p_plus_n(input_text, 7)
processed_text_p21 = apply_p_plus_n(input_text, 21)

# Display results
print("=== Original Text ===")
print(input_text)
print("\n=== Processed Text (P+7) ===")
print(processed_text_p7)
print("\n=== Processed Text (P+21) ===")
print(processed_text_p21)


=== Original Text ===
One must have a mind of winter
To regard the frost and the boughs
Of the pine-trees crusted with snow;
And have been cold a long time
To behold the junipers shagged with ice,
The spruces rough in the distant glitter
Of the January sun; and not to think
Of any misery in the sound of the wind,
In the sound of a few leaves,
Which is the sound of the land
Full of the same wind
That is blowing in the same bare place
For the listener, who listens in the snow,
And, nothing himself, beholds
Nothing that is not there and the nothing that is.

=== Processed Text (P+7) ===
One must have a mind of that
To regard the frost and the and
Of the pine-trees crusted with that
And have been cold a long in
To behold the junipers shagged with as
The spruces rough in the distant -
Of the January sun; and not to I
Of any misery in the sound of the that
In the sound of a few coming
Which is the sound of the in
Full of the same as
That is blowing in the same bare ,"
For the listener, who l