In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

In [2]:
model_id = "distilbert/distilgpt2"

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side='left')
tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [3]:
model

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-5): 6 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [4]:
model.config

GPT2Config {
  "_name_or_path": "distilbert/distilgpt2",
  "_num_labels": 1,
  "activation_function": "gelu_new",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "id2label": {
    "0": "LABEL_0"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0
  },
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 6,
  "n_positions": 1024,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 50
    }
  },
  "transformers_version": "4.44.2",
  "use_cache": true,
  "vocab_size": 5025

In [35]:
batch_data = ["My name is", "I like to", "apples and bananas are", "This winter the snow"]

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [36]:
model_inputs = tokenizer(batch_data, return_tensors="pt", padding=True).to(device)
model_inputs

{'input_ids': tensor([[50256, 50256,  3666,  1438,   318],
        [50256, 50256,    40,   588,   284],
        [ 1324,   829,   290, 35484,   389],
        [50256,  1212,  7374,   262,  6729]], device='cuda:0'), 'attention_mask': tensor([[0, 0, 1, 1, 1],
        [0, 0, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [0, 1, 1, 1, 1]], device='cuda:0')}

In [37]:
model.eval()
with torch.no_grad():
  model_outputs = model.generate(**model_inputs, max_new_tokens=10, do_sample=True, num_beams=4)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [38]:
generated_text = tokenizer.batch_decode(model_outputs, skip_special_tokens=True)
generated_text

['My name is from the name of a man who was born in',
 'I like to make a little bit of money, but I don',
 'apples and bananas are the most popular.\n\n\n\n\n\n',
 'This winter the snow is falling, and the snow is falling, and']