<a href="https://colab.research.google.com/github/misticorion/language-modelling/blob/main/LanguageModelling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#PYTORCH Models

REQUIREMENTS

In [1]:
!pip install pytorch-transformers
!pip install --upgrade urllib3==1.25.4

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## OpenAI GPT-2

Import Libraries

In [2]:
import torch
from pytorch_transformers import GPT2Tokenizer, GPT2LMHeadModel

Load pre-trained model tokenizer.

In [3]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

100%|██████████| 1042301/1042301 [00:00<00:00, 23578575.45B/s]
100%|██████████| 456318/456318 [00:00<00:00, 18761323.46B/s]


Encode a text input

In [4]:
text = "The weather is "
indexed_tokens = tokenizer.encode(text)
print(indexed_tokens)

[383, 6193, 318]


Convert indexed tokens in a PyTorch tensor

In [5]:
tokens_tensor = torch.tensor([indexed_tokens])
print(tokens_tensor)

tensor([[ 383, 6193,  318]])


Load pre-trained model (weights)

In [6]:
model = GPT2LMHeadModel.from_pretrained('gpt2')
print(model)

100%|██████████| 665/665 [00:00<00:00, 142985.19B/s]
100%|██████████| 548118077/548118077 [00:11<00:00, 49506972.41B/s]


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): Laye

Set the model in evaluation mode to deactivate the DropOut modules

In [7]:
model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): Laye

Predict all tokens

In [8]:
with torch.no_grad():
  outputs = model(tokens_tensor)
  print("---------------OUTPUT---------------------")
  print(outputs)
  predictions = outputs[0]
  print("---------------PREDICTIONS----------------")
  print(predictions)

---------------OUTPUT---------------------
(tensor([[[ -35.7776,  -34.8145,  -37.7907,  ...,  -39.8028,  -40.6779,
           -34.2818],
         [-101.0440, -101.9380, -108.4058,  ..., -110.8361, -108.2946,
          -103.3729],
         [-131.2088, -132.5228, -138.5790,  ..., -142.4155, -138.1684,
          -134.8358]]]), (tensor([[[[[-1.0894,  1.9722,  0.7476,  ..., -0.9693, -0.6116,  1.5209],
           [-1.3897,  2.2951,  1.4581,  ...,  0.0340, -2.0842,  3.3009],
           [-1.9516,  2.4045,  1.9535,  ..., -1.6259, -2.7266,  2.5379]],

          [[ 0.0944,  0.3881, -1.2054,  ..., -0.0815,  1.5339, -0.1081],
           [-0.0443,  0.0048, -0.8430,  ...,  0.7112,  4.6417,  2.3829],
           [ 0.6383, -1.3322, -0.1982,  ..., -3.9342,  3.4697,  0.9596]],

          [[-0.1414,  0.0414,  0.5972,  ..., -1.0880, -1.6608,  0.4301],
           [-0.1943,  1.1277,  0.8155,  ..., -3.2024, -0.9752,  1.7431],
           [ 0.5503,  0.1788,  0.1941,  ..., -2.7556,  0.5540,  1.9818]],

          

Get the predicted next sub-word

In [9]:
predicted_index = torch.argmax(predictions[0, -1, :]).item()
print(predicted_index)


922


In [10]:
predicted_text = tokenizer.decode(indexed_tokens + [predicted_index])
print(predicted_text)

 The weather is good
