In [26]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")
model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [27]:
with open("/Snowman.txt", "r") as f:
    poem_lines = f.read().splitlines()

In [28]:
def get_p_plus_x(prompt, x=7):
    inputs = tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)

    # Get logits for the next token
    logits = outputs.logits[0, -1, :]
    probs = torch.softmax(logits, dim=0)

    # Get top tokens sorted by probability
    top_probs, top_indices = torch.topk(probs, k=50)

    # x-th most probable (1-indexed â†’ 0-indexed)
    token_id = top_indices[x - 1].item()
    return tokenizer.decode([token_id])

In [29]:
def process_poem(poem_lines, x=7):
    new_lines = []
    for line in poem_lines:
        if not line.strip():
            new_lines.append(line)
            continue

        words = line.strip().split()
        base = " ".join(words[:-1])
        replacement = get_p_plus_x(base, x=x)
        new_line = base + " " + replacement
        new_lines.append(new_line)
    return new_lines

In [51]:
with open("SnowMan_P+26.txt", "w") as f:
    f.write("\n".join(process_poem(poem_lines, x=26)))

with open("SnowMan_P+23.txt", "w") as f:
    f.write("\n".join(process_poem(poem_lines, x=23)))

In [None]:
from google.colab import drive
drive.mount('/content/drive')