In [1]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m89.8 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m113.6 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.12.1-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.12.1 tokenizers-0.13.2 transformers-4.26.1


In [2]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch as t

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [5]:
model.to('cuda')

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dro

In [8]:
def greedy_sampling(logits):
  return logits.argmax()

def test_model(text = "Replace me by any text you'd like.", steps = 100, sampling = greedy_sampling):
    eos_token = "<|endoftext|>"
    prompt = text
    print("Starting prompt: " + prompt)

    for i in range(steps):
        encoded_input = tokenizer(prompt, return_tensors="pt").to(model.device)
        logits = model(**encoded_input).logits[0, -1]
        next_token = sampling(logits)
        next_string = tokenizer.decode(next_token)
        if next_string == eos_token:
            break
        prompt = prompt + next_string
    print("Current generation: " + prompt)

In [9]:
test_model("My name is Rohil.", 10)

Starting prompt: My name is Rohil.
Current generation: My name is Rohil. I am a student of the art of the art


In [10]:
def top_k_sampling(k):


      def top_sampling(logits):
          probs = t.nn.functional.softmax(logits)
          values, indices = t.topk(probs, k)
          index = values.multinomial(num_samples = 1, replacement = True)
          return indices[index]
      
      return top_sampling

In [11]:
test_model("My name is Rohil.", 10, top_k_sampling(10))

Starting prompt: My name is Rohil.
Current generation: My name is Rohil. I am a student at the university. I was


  probs = t.nn.functional.softmax(logits)


In [12]:
test_model("My name is Rohil.", 10, top_k_sampling(50))

Starting prompt: My name is Rohil.
Current generation: My name is Rohil. I am a young man living with a family in


  probs = t.nn.functional.softmax(logits)


In [13]:
test_model('Mary is the greatest. Or is she?', 100, top_k_sampling(50))

Starting prompt: Mary is the greatest. Or is she?


  probs = t.nn.functional.softmax(logits)


Current generation: Mary is the greatest. Or is she? I remember some time ago when she told me she had never met a person quite like that. She said she enjoyed being around people because she could pick them apart, and she didn't mind the others. But I just never got into talking to her. It got difficult for me to tell the real story. When I told her I had no idea what she wanted to ask or what she wanted to know about the business and everything that was there. That would have been the end of it. Because


In [20]:
def top_p_sampling(p):

    def top_sampling(logits):
          probs = t.nn.functional.softmax(logits)
          descending_probs, original_indices = t.sort(probs, descending = True)
          p_mask = t.cumsum(descending_probs, -1) <= p
          p_mask[p_mask.sum()] = True
                
          index = descending_probs[p_mask].multinomial(num_samples = 1, replacement = True)
          return original_indices[index]
      
    return top_sampling

In [23]:
test_model('Mary is the greatest. Or is she?', 20, top_p_sampling(0.7))

Starting prompt: Mary is the greatest. Or is she?


  probs = t.nn.functional.softmax(logits)


Current generation: Mary is the greatest. Or is she? She is called after them. This girl has no father. She is called after her. She is
