In [11]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import os
import numpy as np

In [2]:
from transformers import BertTokenizer, BertModel, pipeline

def load_bert_base():
    """
    Load Hugging Face's original BERT-base (uncased) model.
    - 12 layers
    - 12 attention heads
    - Hidden size 768
    """
    model_name = "bert-base-uncased"
    
    # Load tokenizer and model
    tokenizer = BertTokenizer.from_pretrained(model_name)
    model = BertModel.from_pretrained(model_name)

    return tokenizer, model


if __name__ == "__main__":
    tokenizer, model = load_bert_base()

    # Example input
    text = "  Hello,,,   WORLD!! 42@@@openAI##   is---great??? \n\n   NLP---rocks :)    [unused10] "
    inputs = tokenizer(text, return_tensors="pt")

    # Forward pass (get hidden states)
    outputs = model(**inputs)

    # The last hidden state representation
    last_hidden_state = outputs.last_hidden_state

    print("Input text:", text)
    print("Tokenized input IDs:", inputs["input_ids"])
    print("Last hidden state shape:", last_hidden_state.shape)


Input text:   Hello,,,   WORLD!! 42@@@openAI##   is---great??? 

   NLP---rocks :)    [unused10] 
Tokenized input IDs: tensor([[  101,  7592,  1010,  1010,  1010,  2088,   999,   999,  4413,  1030,
          1030,  1030,  2330,  4886,  1001,  1001,  2003,  1011,  1011,  1011,
          2307,  1029,  1029,  1029, 17953,  2361,  1011,  1011,  1011,  5749,
          1024,  1007,  1031, 15171, 10790,  1033,   102]])
Last hidden state shape: torch.Size([1, 37, 768])


In [3]:
model

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
  

In [4]:
from transformers import pipeline

# Load BERT fill-mask pipeline
unmasker = pipeline("fill-mask", model="bert-base-uncased")

# Sentence with a [MASK] token
text = "We are people of [MASK]."

# Get predictions
predictions = unmasker(text)

for pred in predictions:
    print(f"{pred['sequence']} (score: {pred['score']:.4f})")


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0


we are people of peace. (score: 0.1621)
we are people of god. (score: 0.0769)
we are people of faith. (score: 0.0533)
we are people of honor. (score: 0.0413)
we are people of light. (score: 0.0259)


In [12]:
weigthPat = r"C:\Users\shiva\Desktop\IISC\code\NeuroCpp\Projects\The Dream\weigths\BERT weights"
def SaveTensor(mat, fileName):
    np.save(
            os.path.join(weigthPat, fileName),
            mat.detach().cpu().numpy().astype(np.float32)
        )

In [13]:
def SaveModel(model): # save weights for model
    for name, weight in model.named_parameters():
        SaveTensor(weight, name)
        print(f"{name} saved")

In [14]:
SaveModel(model)

embeddings.word_embeddings.weight saved
embeddings.position_embeddings.weight saved
embeddings.token_type_embeddings.weight saved
embeddings.LayerNorm.weight saved
embeddings.LayerNorm.bias saved
encoder.layer.0.attention.self.query.weight saved
encoder.layer.0.attention.self.query.bias saved
encoder.layer.0.attention.self.key.weight saved
encoder.layer.0.attention.self.key.bias saved
encoder.layer.0.attention.self.value.weight saved
encoder.layer.0.attention.self.value.bias saved
encoder.layer.0.attention.output.dense.weight saved
encoder.layer.0.attention.output.dense.bias saved
encoder.layer.0.attention.output.LayerNorm.weight saved
encoder.layer.0.attention.output.LayerNorm.bias saved
encoder.layer.0.intermediate.dense.weight saved
encoder.layer.0.intermediate.dense.bias saved
encoder.layer.0.output.dense.weight saved
encoder.layer.0.output.dense.bias saved
encoder.layer.0.output.LayerNorm.weight saved
encoder.layer.0.output.LayerNorm.bias saved
encoder.layer.1.attention.self.query