# Loading models from **transformers** library

In [1]:
import os
import torch
import transformers
from transformers import BertTokenizer, BertForMaskedLM, GPT2Tokenizer, GPT2Config, GPT2Model
import pandas as pd
import numpy as np
# import surprisal
# from surprisal import AutoHuggingFaceModel


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def load_BERT(model_path):
    model = BertForMaskedLM.from_pretrained(model_path)
    model.eval()
    tokenizer = BertTokenizer.from_pretrained(model_path)
    model_info = {
        'model': model,
        'tokenizer': tokenizer
    }
    return model_info

In [3]:
def load_GPT2(model_path):
    model = AutoHuggingFaceModel.from_pretrained(model_path)
    model.eval()
    tokenizer = GPT2Tokenizer.from_pretrained(model_path)
    model_info = {
        'model': model,
        'tokenizer': tokenizer
    }
    return model_info

In [8]:
configuration = GPT2Config()
model = GPT2Model(configuration)
configuration = model.config
configuration

GPT2Config {
  "activation_function": "gelu_new",
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "transformers_version": "4.38.1",
  "use_cache": true,
  "vocab_size": 50257
}

In [9]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

input_text = "They have always been"

input_ids = tokenizer.encode(input_text, return_tensors="pt")

with torch.no_grad():
    outputs = model(input_ids)

last_hidden_states = outputs.last_hidden_state
last_hidden_states

tensor([[[-1.9663,  0.7905, -0.1253,  ..., -0.9741, -1.5458,  1.7247],
         [-1.2045,  0.4355, -0.2053,  ..., -1.2483, -0.3793,  1.1626],
         [-2.9977, -0.3331,  0.5655,  ..., -0.6176,  0.0397, -0.1688],
         [-2.9592,  0.7808,  0.3051,  ..., -0.7967,  0.0373,  2.7441]]])

In [6]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2Model.from_pretrained("gpt2")

input_text = "They have always been"

input_ids = tokenizer.encode(input_text, return_tensors="pt")

with torch.no_grad():
    outputs = model(input_ids)

last_hidden_states = outputs.last_hidden_state
last_hidden_states

tensor([[[-0.0232,  0.0910, -0.1171,  ..., -0.1607, -0.0382,  0.0249],
         [ 0.1139, -0.2807, -1.0106,  ..., -0.2054,  0.4313,  0.2619],
         [-0.0560, -0.0785, -0.1575,  ..., -0.0552, -0.2186, -0.1953],
         [-0.3048, -0.1396,  0.7762,  ...,  0.0969, -0.0255, -0.1027]]])

In [20]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

input_text = "The cat sits on"
inputs = tokenizer.encode(input_text, return_tensors="pt")

outputs = model.generate(inputs, max_length=50, num_return_sequences=1)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(generated_text)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


The cat sits on the floor, and the cat is sitting on the floor.

The cat is sitting on the floor.

The cat is sitting on the floor.

The cat is sitting on the floor.

The cat


In [24]:

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

input_text = "The cat sits on"

tokens = tokenizer.encode(input_text, return_tensors="pt")
print(tokens)
target_token = tokenizer.encode(" the", add_special_tokens=False)[0]


with torch.no_grad():  
    outputs = model(tokens)
    predictions = outputs.logits
    print(predictions.shape)

logits = predictions[0, -1, :]
probabilities = torch.softmax(logits, dim=-1)

probability = probabilities[target_token].item()

surprisal = -np.log2(probability)

print(f"Probability of 'the': {probability}")
print(f"Surprisal of 'the': {surprisal}")


tensor([[  464,  3797, 10718,   319]])
torch.Size([1, 4, 50257])
Probability of 'the': 0.3733646869659424
Surprisal of 'the': 1.4213426117482204
