<a href="https://colab.research.google.com/github/rogrady350/GitHubDemo/blob/main/textGenCoLabNotebookFix.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch


In [None]:
!pip install "transformers>=4.24.0"

In [None]:
import torch

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
#model_name = "gpt2-xl"
model_name = "gpt2"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)

In [None]:
device

In [None]:
# hide_output
import pandas as pd

input_txt = "Transformers are the"
input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
iterations = []
n_steps = 8
choices_per_step = 5

with torch.no_grad():
    for _ in range(n_steps):
        iteration = dict()
        iteration["Input"] = tokenizer.decode(input_ids[0])
        output = model(input_ids=input_ids)
        # Select logits of the first batch and the last token and apply softmax
        next_token_logits = output.logits[0, -1, :]
        next_token_probs = torch.softmax(next_token_logits, dim=-1)
        sorted_ids = torch.argsort(next_token_probs, dim=-1, descending=True)
        # Store tokens with highest probabilities
        for choice_idx in range(choices_per_step):
            token_id = sorted_ids[choice_idx]
            token_prob = next_token_probs[token_id].cpu().numpy()
            token_choice = (
                f"{tokenizer.decode(token_id)} ({100 * token_prob:.2f}%)"
            )
            iteration[f"Choice {choice_idx+1}"] = token_choice
        # Append predicted next token to input
        input_ids = torch.cat([input_ids, sorted_ids[None, 0, None]], dim=-1)
        iterations.append(iteration)

pd.DataFrame(iterations)

In [None]:
input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)



In [None]:
#attention_mask = input_ids["attention_mask"]

In [None]:
output = model.generate(input_ids, max_new_tokens=n_steps, do_sample=False)

In [None]:
print(tokenizer.decode(output[0]))

In [None]:
max_length = 128
input_txt = """During a routine satellite scan of the Amazon rainforest, \
researchers at the International Space Station spotted a series of massive, \
glowing symbols in a remote clearing. The scientists, unsure of their origin, \
decided to investigate further. To their astonishment, they found that the \
symbols were ancient carvings that emitted a soft, pulsating light, seemingly \
responding to their presence.\n\n
"""

input_ids = tokenizer(input_txt, return_tensors="pt")["input_ids"].to(device)
output_greedy = model.generate(input_ids, max_length=max_length,
                               do_sample=False)
print(tokenizer.decode(output_greedy[0]))


Beam Search

In [None]:
import numpy as np

In [None]:
import torch.nn.functional as F

In [None]:
def log_probs_from_logits(logits, labels):
    logp = F.log_softmax(logits, dim=-1)
    logp_label = torch.gather(logp, 2, labels.unsqueeze(2)).squeeze(-1)
    return logp_label


In [None]:
def sequence_logprob(model, labels, input_len=0):
    with torch.no_grad():
        output = model(labels)
        log_probs = log_probs_from_logits(
            output.logits[:, :-1, :], labels[:, 1:])
        seq_log_prob = torch.sum(log_probs[:, input_len:])
    return seq_log_prob.cpu().numpy()

In [None]:
logp = sequence_logprob(model, output_greedy, input_len=len(input_ids[0]))
print(tokenizer.decode(output_greedy[0]))
print(f"\nlog-prob: {logp:.2f}")


In [None]:
output_beam = model.generate(input_ids, max_length=max_length, num_beams=5,
                             do_sample=False)
logp = sequence_logprob(model, output_beam, input_len=len(input_ids[0]))
print(tokenizer.decode(output_beam[0]))
print(f"\nlog-prob: {logp:.2f}")


Sampling Methods

In [None]:


#hide_input

#id temperature
#alt Token probabilities as a function of temperature
#caption Distribution of randomly generated token probabilities for three selected temperatures
import matplotlib.pyplot as plt
import numpy as np

def softmax(logits, T=1):
    e_x = np.exp(logits / T)
    return e_x / e_x.sum()

logits = np.exp(np.random.random(1000))
sorted_logits = np.sort(logits)[::-1]
x = np.arange(1000)

for T in [0.5, 1.0, 2.0]:
    plt.step(x, softmax(sorted_logits, T), label=f"T={T}")
plt.legend(loc="best")
plt.xlabel("Sorted token probabilities")
plt.ylabel("Probability")
plt.show()


In [None]:
# hide
torch.manual_seed(42);

Try different temperatures 2.9, .5

In [None]:
#low temp, conservative model, less creative, favor more likely tokens. more prodictable and coherent results.
output_temp = model.generate(input_ids, max_length=max_length, do_sample=True,
                             temperature=.5, top_k=0)
print(tokenizer.decode(output_temp[0]))


In [None]:
#very high temp, very high creativity, less predictable
output_temp = model.generate(input_ids, max_length=max_length, do_sample=True,
                             temperature=2.9, top_k=0)
print(tokenizer.decode(output_temp[0]))

# results rather incoherent

In [None]:
#high temp, high creativity, less predictable
output_temp = model.generate(input_ids, max_length=max_length, do_sample=True,
                             temperature=1.5, top_k=0)
print(tokenizer.decode(output_temp[0]))


In [None]:
#balanced temp. balanced but slightly less creative
output_temp = model.generate(input_ids, max_length=max_length, do_sample=True,
                             temperature=.9, top_k=0)
print(tokenizer.decode(output_temp[0]))

#more coherent results

Contrastive Search


other models, settings

In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

model_name = 'gpt2-large'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id)
model.eval()

# prepare the prefix
prefix_text = input_txt
input_ids = tokenizer(prefix_text, return_tensors='pt').input_ids

# generate the result with contrastive search
#increase penalty_alpha: increase penalty for repetative or over confident predictions. more unique phrasing but may lead to unexpected langue if too high
#increase top_k: expands pool of tokens to top 10. Higher value sincrease diversity. Lower values make response more deterministic
#decrease max length: shorter more consise responses
output = model.generate(input_ids, penalty_alpha=0.8, top_k=10, max_length=256)
print("Output:\n" + 100 * '-')
print(tokenizer.decode(output[0], skip_special_tokens=True))
print("" + 100 * '-')



In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

#large model. currently used
model_name = 'gpt2-large'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id)
model.eval()

# prepare the prefix
prefix_text = input_txt
input_ids = tokenizer(prefix_text, return_tensors='pt').input_ids

# generate the result with contrastive search
output = model.generate(input_ids, penalty_alpha=0.6, top_k=4, max_length=512)
print("Output:\n" + 100 * '-')
print(tokenizer.decode(output[0], skip_special_tokens=True))
print("" + 100 * '-')



In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

#smallest model
model_name = 'gpt2'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id)
model.eval()

# prepare the prefix
prefix_text = input_txt
input_ids = tokenizer(prefix_text, return_tensors='pt').input_ids

# generate the result with contrastive search
output = model.generate(input_ids, penalty_alpha=0.6, top_k=4, max_length=512)
print("Output:\n" + 100 * '-')
print(tokenizer.decode(output[0], skip_special_tokens=True))
print("" + 100 * '-')

