In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
%%HTML
<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 1px  black solid !important;
  color: black !important;
}
</style>

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM

import pandas as pd
import torch
import torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
model_checkpoint = 'gpt2-xl'
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForCausalLM.from_pretrained(model_checkpoint).to(device)

In [5]:
# greedy search decoding
input_txt = 'Transformers are the'
input_ids = tokenizer(input_txt, return_tensors='pt')['input_ids'].to(device)
iterations = []
n_steps = 8
choices_per_step = 5

with torch.no_grad():
    for _ in range(n_steps):
        iteration = dict()
        iteration['input'] = tokenizer.decode(input_ids[0])

        output = model(input_ids=input_ids)
        next_token_logits = output.logits[0, -1, :]
        next_token_probs = torch.softmax(next_token_logits, dim=-1)
        sorted_ids = torch.argsort(next_token_probs, dim=-1, descending=True)

        for choice_idx in range(choices_per_step):
            token_id = sorted_ids[choice_idx]
            token_probs = next_token_probs[token_id]
            token_choice = f'{tokenizer.decode(token_id)} ({token_probs * 100:.2f}%)'
            iteration[f'Choice {choice_idx + 1}'] = token_choice
        input_ids = torch.cat([input_ids, sorted_ids[None, 0, None]], dim=-1)
        iterations.append(iteration)

pd.DataFrame(iterations)

Unnamed: 0,input,Choice 1,Choice 2,Choice 3,Choice 4,Choice 5
0,Transformers are the,most (8.53%),only (4.96%),best (4.65%),Transformers (4.37%),ultimate (2.16%)
1,Transformers are the most,popular (16.78%),powerful (5.37%),common (4.96%),famous (3.72%),successful (3.20%)
2,Transformers are the most popular,toy (10.63%),toys (7.23%),Transformers (6.60%),of (5.46%),and (3.76%)
3,Transformers are the most popular toy,line (34.38%),in (18.20%),of (11.71%),brand (6.10%),line (2.69%)
4,Transformers are the most popular toy line,in (46.28%),of (15.09%),", (4.94%)",on (4.40%),ever (2.72%)
5,Transformers are the most popular toy line in,the (65.99%),history (12.42%),America (6.91%),Japan (2.44%),North (1.40%)
6,Transformers are the most popular toy line in the,world (69.26%),United (4.55%),history (4.29%),US (4.23%),U (2.30%)
7,Transformers are the most popular toy line in ...,", (39.73%)",. (30.64%),and (9.87%),with (2.32%),today (1.74%)


In [6]:
# greedy search decoding using transformers inbuilt generate method
input_ids = tokenizer(input_txt, return_tensors='pt')['input_ids'].to(device)
output = model.generate(input_ids, max_new_tokens=n_steps, do_sample=False)
print(tokenizer.decode(output[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Transformers are the most popular toy line in the world,


In [7]:
def log_probs_from_logits(logits, generated_text_tokens):
    logp = F.log_softmax(logits, dim=-1)
    label_probs = torch.gather(logp, dim=2, index=generated_text_tokens)
    return label_probs.squeeze(-1)


def sequence_log_prob(model, generated_text_tokens, input_len):
    with torch.no_grad():
        output = model(generated_text_tokens)
        log_probs = log_probs_from_logits(output.logits[:, :-1, :], generated_text_tokens[:, 1:].unsqueeze(2))
        seq_log_prob = torch.sum(log_probs[:, input_len:])
    return seq_log_prob.cpu().numpy()

In [10]:
# decoding techniques
# comparing log probability of a sequence using
# greedy decoding and beam search decoding
max_length = 128
input_txt = """In a shocking finding, scientist discovered \
a herd of unicorns living in a remote, previously unexplored \
valley, in the Andes Mountains. Even more surprising to the \
researchers was the fact that the unicorns spoke perfect English.\n\n
"""
input_ids = tokenizer(input_txt, return_tensors='pt')['input_ids'].to(device)
output_greedy = model.generate(input_ids, max_length=max_length, do_sample=False)
logp = sequence_log_prob(model, output_greedy, len(input_ids[0]))
print(tokenizer.decode(output_greedy[0]))
print(f'log probability of sequence from greedy decoding: {logp:.2f}')

print('\n\n\n')

# generating text using beam search
output_beam = model.generate(input_ids, max_length=max_length, num_beams=5, do_sample=False)
logp = sequence_log_prob(model, output_beam, len(input_ids[0]))
print(tokenizer.decode(output_beam[0]))
print(f'log probability of sequence from beam search: {logp:.2f}')

print('\n\n\n')

# both greedy decoding and beam search produces repetitve text
# we can use no_repeat_ngram_size in the generate function to avoid repetition.
output_beam = model.generate(input_ids, max_length=max_length, num_beams=5, do_sample=False, no_repeat_ngram_size=2)
logp = sequence_log_prob(model, output_beam, len(input_ids[0]))
print(tokenizer.decode(output_beam[0]))
print(f'sequence from beam search: {logp:.2f}')

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English.


The researchers, from the University of California, Davis, and the University of Colorado, Boulder, were conducting a study on the Andean cloud forest, which is home to the rare species of cloud forest trees.


The researchers were surprised to find that the unicorns were able to communicate with each other, and even with humans.


The researchers were surprised to find that the unicorns were able
log probability of sequence from greedy decoding: -87.43






The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English.


The discovery of the unicorns was made by a team of scientists from the University of California, Santa Cruz, and the National Geographic Society.


The scientists were conducting a study of the Andes Mountains when they discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English
log probability of sequence from beam search: -55.23




In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English.


The discovery was made by a team of scientists from the Uni

In [11]:
# when temp >> 1, the probability distribution is flattened.
output_temp = model.generate(input_ids, max_length=max_length, do_sample=True, temperature=2.0, top_k=0)
logp = sequence_log_prob(model, output_temp, len(input_ids[0]))
print(tokenizer.decode(output_temp[0]))
print(f'sequence from temperature parameter >> 1: {logp:.2f}')

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English.


FOR Benny V Dorothy Tomorrow9851232006 Modes 3 118 SummaryGodyn Janssonlictjs apparently had belonged t Almaheloss atroc Craig novel dy Austria army unnamed warrior trappedAlthough stopping dracon 6917 warrantyMoreover theoryicycleaSAME nonno Dah Moonlight fallen petmallowweiddled Pir Sod Highland Sleep awfully DropsBluSqu disinformation labaaoccor/training crucifix 2019 duoenough plots delivered prenatEn Waldemo
sequence from beam search: -880.96


In [15]:
# when temp << 1, the probability distribution is spiky, meaning the probability distribution is spiky
# around the mean
output_temp = model.generate(input_ids, max_length=max_length, do_sample=True, temperature=0.5, top_k=0)
logp = sequence_log_prob(model, output_temp, len(input_ids[0]))
print(tokenizer.decode(output_temp[0]))
print(f'sequence from temperature parameter << 1: {logp:.2f}')

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English.


The herd of unicorns is believed to have been living in the region for at least 10,000 years, and their existence has been documented by the ancient Inca civilization.

Researchers believe that the unicorns were domesticated by the Inca and brought to the valley by the Spanish conquistadors.

The unicorns were found in the Andes Mountains. They were thought to have
sequence from temperature parameter << 1: -98.18


In [16]:
output_topk = model.generate(input_ids, max_length=max_length, do_sample=True, top_k=50)
logp = sequence_log_prob(model, output_topk, len(input_ids[0]))
print(tokenizer.decode(output_topk[0]))
print(f'sequence from top k: {logp:.2f}')

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English.


A group of scientists, including a renowned zoologist and a PhD candidate, have just completed a three-year search for unicorns following a report written by a German zoologist, Dr. Pauline Baum. The news of the unicorn led to a search through archives in the U.S., Mexico and India.


Unicorns were found living in the remote Andean region of Tres
sequence from top k: -183.01


In [18]:
output_topp = model.generate(input_ids, max_length=max_length, do_sample=True, top_p=0.90)
logp = sequence_log_prob(model, output_topp, len(input_ids[0]))
print(tokenizer.decode(output_topp[0]))
print(f'sequence from top k: {logp:.2f}')

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In a shocking finding, scientist discovered a herd of unicorns living in a remote, previously unexplored valley, in the Andes Mountains. Even more surprising to the researchers was the fact that the unicorns spoke perfect English.


After three months of study, the scientists concluded that the unicorn herd was made up of three animals with different physical appearance.

The discovery was made when the group of six researchers went out for a walk in a valley located in the northern region of Peru in 2012.

One of the scientists, Dr. Sergio Avila, the lead of the research, observed a flock of unicorns,
sequence from top k: -158.87


In [3]:
import torch
import time

def stress_test(duration_minutes=5, matrix_size=5000):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Running on: {device}")
    
    start_time = time.time()
    end_time = start_time + (duration_minutes * 60)
    
    iteration = 0
    while time.time() < end_time:
        # Create large random matrices
        a = torch.randn(matrix_size, matrix_size, device=device)
        b = torch.randn(matrix_size, matrix_size, device=device)
        
        # Matrix multiplication
        c = torch.matmul(a, b)
        
        if iteration % 10 == 0:
            # Print GPU stats every 10 iterations
            print(f"Iteration {iteration}")
            # Force CUDA synchronization
            torch.cuda.synchronize()
        
        iteration += 1
        
        # Clear memory
        del a, b, c
        torch.cuda.empty_cache()


stress_test(duration_minutes=5, matrix_size=5000)


Running on: cuda
Iteration 0
Iteration 10
Iteration 20
Iteration 30
Iteration 40
Iteration 50
Iteration 60
Iteration 70
Iteration 80
Iteration 90
Iteration 100
Iteration 110
Iteration 120
Iteration 130
Iteration 140
Iteration 150
Iteration 160
Iteration 170
Iteration 180
Iteration 190
Iteration 200
Iteration 210
Iteration 220
Iteration 230
Iteration 240
Iteration 250
Iteration 260
Iteration 270
Iteration 280
Iteration 290
Iteration 300
Iteration 310
Iteration 320
Iteration 330
Iteration 340
Iteration 350
Iteration 360
Iteration 370
Iteration 380
Iteration 390
Iteration 400
Iteration 410
Iteration 420
Iteration 430
Iteration 440
Iteration 450
Iteration 460
Iteration 470
Iteration 480
Iteration 490
Iteration 500
Iteration 510
Iteration 520
Iteration 530
Iteration 540
Iteration 550
Iteration 560
Iteration 570
Iteration 580
Iteration 590
Iteration 600
Iteration 610
Iteration 620
Iteration 630
Iteration 640
Iteration 650
Iteration 660
Iteration 670
Iteration 680
Iteration 690
Iteration 700


KeyboardInterrupt: 