In [8]:
from blocks import InputPreprocess
import tiktoken

In [9]:
tokenizer = tiktoken.get_encoding('gpt2')
GPT2_CONFIG = {
  "activation_function": "gelu_new",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_layer": 12,
  "n_positions": 1024,
  "resid_pdrop": 0.1,
  "summary_activation": None,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": True,
  "summary_type": "cls_index",
  "summary_use_proj": True,
  "task_specific_params": {
    "text-generation": {
      "do_sample": True,
      "max_length": 50
    }
  },
  "vocab_size": 50257
}

In [10]:
ip = InputPreprocess(tokenizer, GPT2_CONFIG)
txt_list = ["I am in Ha Noi", "Explain what's the transformers i 5 minutes"]
inputs, attn_masked = ip(txt_list)

In [11]:
print(inputs.shape)
print(attn_masked.shape)

torch.Size([2, 10, 768])
torch.Size([2, 1, 1, 10])


In [12]:
print(inputs)

tensor([[[-0.3052, -3.5234,  1.0432,  ..., -0.8650,  1.7708, -1.5948],
         [ 2.3044,  3.2293,  1.1646,  ...,  0.7300,  0.0557,  2.3054],
         [ 1.4038, -1.1272,  0.4386,  ...,  1.1607, -1.2069, -0.8812],
         ...,
         [-0.0702,  1.3136, -0.7022,  ..., -2.7585, -0.9347,  0.2780],
         [ 1.5782,  0.7862,  0.5654,  ..., -0.3721,  0.5352,  0.3973],
         [ 0.2597,  2.8730,  0.7492,  ..., -0.7969, -0.8722, -1.0621]],

        [[ 1.2830, -2.7224, -0.0158,  ..., -0.5556, -1.3101, -1.9773],
         [-1.6789,  1.3044,  0.0321,  ...,  0.1112,  3.3969,  2.1343],
         [-0.6332, -0.8554,  0.1782,  ...,  3.4361, -0.4573,  0.1535],
         ...,
         [-0.1346, -0.4382, -0.9837,  ..., -2.3259, -0.2421,  0.8486],
         [ 0.9757, -0.2826,  0.4836,  ...,  0.5737,  1.4028,  1.5871],
         [-0.5119,  1.0605,  1.3500,  ...,  0.2390, -0.9372, -2.9989]]],
       grad_fn=<AddBackward0>)


In [13]:
print(attn_masked)

tensor([[[[ True,  True,  True,  True,  True,  True, False, False, False, False]]],


        [[[ True,  True,  True,  True,  True,  True,  True,  True,  True,  True]]]])


In [14]:
print(attn_masked.reshape(2, 1, 10))

tensor([[[ True,  True,  True,  True,  True,  True, False, False, False, False]],

        [[ True,  True,  True,  True,  True,  True,  True,  True,  True,  True]]])
