In [43]:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch

# Load model
print("Loading AI model...")
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Check if CUDA is available and move model to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"Model moved to {device}")

Loading AI model...


Loading weights:   0%|          | 0/148 [00:00<?, ?it/s]

GPT2LMHeadModel LOAD REPORT from: gpt2
Key                  | Status     |  | 
---------------------+------------+--+-
h.{0...11}.attn.bias | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Model moved to cuda


In [44]:
# tokenizer.encode("<|endoftext|>")
for i in range(1,tokenizer.vocab_size):
  if "|" in tokenizer.decode(i):
    print(tokenizer.decode(i))
    print(i)


|
91
 |
930
 ||
8614
)|
14726
||
15886
-|
22831
||||
42210
 |--
44233
 [|
49074
<|endoftext|>
50256


In [45]:
text = "A simple 4 line poem with 5-7 words per line: Roses are red, "
print(f"Starting poem:\n{text}")

current_text = text
target_lines = 4
current_lines = 1
temp = 0.7 # Set a more reasonable default temperature. High temps can make output very random
topk = 50

# Repetition penalty parameters
repetition_penalty_factor = 1.2 # Base penalty strength (e.g., 1.0 means no penalty, >1.0 penalizes)
repetition_penalty_decay_rate = 0.9 # How fast the penalty decays with distance (0 < decay_rate < 1)
max_repetition_penalty_distance = 20 # Only penalize tokens within this many steps back in the input_ids

while current_lines < target_lines:
    print(f"Line {current_lines + 1}:")

    # Add newline to current_text to provide context for the model
    current_text += "\n"

    # Encode current_text to get the base input_ids for generating this line
    input_ids_for_line_generation = tokenizer.encode(current_text, return_tensors="pt").to(device)

    # Generate 5-8 words for this line
    words_in_line = 0
    target_words = torch.randint(5, 8, (1,)).item()

    # List to store token IDs for the current line segment
    current_line_segment_token_ids = []

    while words_in_line < target_words:
        # Get predictions based on the continually growing input_ids_for_line_generation
        with torch.no_grad():
            outputs = model(input_ids_for_line_generation)
            predictions = outputs.logits

        # Get the predictions for the NEXT token
        next_token_logits = predictions[0, -1, :]

        # Apply repetition penalty with exponential backoff
        for distance, token_id_tensor in enumerate(reversed(input_ids_for_line_generation[0])):
            if distance >= max_repetition_penalty_distance:
                break

            token_id = token_id_tensor.item()

            # Calculate the dynamic penalty for this token based on its distance
            current_penalty = 1.0 + (repetition_penalty_factor - 1.0) * (repetition_penalty_decay_rate ** distance)

            # Apply the penalty
            if next_token_logits[token_id] < 0:
                next_token_logits[token_id] *= current_penalty
            else:
                next_token_logits[token_id] /= current_penalty

        # Apply temperature here, BEFORE softmax
        next_token_logits /= temp

        # Ensure endoftext token is heavily penalized
        endoftext_token_id = tokenizer.encode("<|endoftext|> ")[0] # Get the token ID
        next_token_logits[endoftext_token_id] = -1e10
        next_token_probs = torch.softmax(next_token_logits, dim=0)

        # Get top predictions and sample from them
        top_probs, top_indices = torch.topk(next_token_probs, topk)
        top_k_probs = top_probs / top_probs.sum()
        sampled_index = torch.multinomial(top_k_probs, 1).item()
        next_token_id = top_indices[sampled_index].item()

        # Append the new token ID to our list for the current line segment
        current_line_segment_token_ids.append(next_token_id)

        # Update input_ids for the next prediction step in this line
        new_input_id_tensor = torch.tensor([[next_token_id]]).to(device)
        input_ids_for_line_generation = torch.cat((input_ids_for_line_generation, new_input_id_tensor), dim=1)

        # Decode *only the new token* to check if it's a word for counting purposes
        temp_next_token_str = tokenizer.decode([next_token_id])
        if any(c.isalpha() for c in temp_next_token_str):
            words_in_line += 1

    # After generating all words for the line, decode the entire segment of token IDs at once
    # This allows the tokenizer to handle all internal spacing correctly
    new_line = tokenizer.decode(current_line_segment_token_ids, skip_special_tokens=True).strip()

    # Append the fully decoded and properly spaced new line to the main current_text
    current_text += new_line

    print(f"Complete line: '{new_line}'\n")

    current_lines += 1

print("FINAL POEM:")
print(current_text.strip())

Starting poem:
A simple 4 line poem with 5-7 words per line: Roses are red, 
Line 2:
Complete line: 'Rings to follow on the ground'

Line 3:
Complete line: 'The night I made a line. A'

Line 4:
Complete line: 'Lighted tree, but no flowers'

FINAL POEM:
A simple 4 line poem with 5-7 words per line: Roses are red, 
Rings to follow on the ground
The night I made a line. A
Lighted tree, but no flowers
