In [5]:
%load_ext autoreload
%autoreload 2

In [6]:
from transformers import BertTokenizer, BertModel

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertModel.from_pretrained("bert-base-uncased")

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
bert_state_dict = model.state_dict()

In [8]:
bert_state_dict.keys()

odict_keys(['embeddings.word_embeddings.weight', 'embeddings.position_embeddings.weight', 'embeddings.token_type_embeddings.weight', 'embeddings.LayerNorm.weight', 'embeddings.LayerNorm.bias', 'encoder.layer.0.attention.self.query.weight', 'encoder.layer.0.attention.self.query.bias', 'encoder.layer.0.attention.self.key.weight', 'encoder.layer.0.attention.self.key.bias', 'encoder.layer.0.attention.self.value.weight', 'encoder.layer.0.attention.self.value.bias', 'encoder.layer.0.attention.output.dense.weight', 'encoder.layer.0.attention.output.dense.bias', 'encoder.layer.0.attention.output.LayerNorm.weight', 'encoder.layer.0.attention.output.LayerNorm.bias', 'encoder.layer.0.intermediate.dense.weight', 'encoder.layer.0.intermediate.dense.bias', 'encoder.layer.0.output.dense.weight', 'encoder.layer.0.output.dense.bias', 'encoder.layer.0.output.LayerNorm.weight', 'encoder.layer.0.output.LayerNorm.bias', 'encoder.layer.1.attention.self.query.weight', 'encoder.layer.1.attention.self.query.bi

In [9]:
for k, v in bert_state_dict.items():
    print(k, v.size())

embeddings.word_embeddings.weight torch.Size([30522, 768])
embeddings.position_embeddings.weight torch.Size([512, 768])
embeddings.token_type_embeddings.weight torch.Size([2, 768])
embeddings.LayerNorm.weight torch.Size([768])
embeddings.LayerNorm.bias torch.Size([768])
encoder.layer.0.attention.self.query.weight torch.Size([768, 768])
encoder.layer.0.attention.self.query.bias torch.Size([768])
encoder.layer.0.attention.self.key.weight torch.Size([768, 768])
encoder.layer.0.attention.self.key.bias torch.Size([768])
encoder.layer.0.attention.self.value.weight torch.Size([768, 768])
encoder.layer.0.attention.self.value.bias torch.Size([768])
encoder.layer.0.attention.output.dense.weight torch.Size([768, 768])
encoder.layer.0.attention.output.dense.bias torch.Size([768])
encoder.layer.0.attention.output.LayerNorm.weight torch.Size([768])
encoder.layer.0.attention.output.LayerNorm.bias torch.Size([768])
encoder.layer.0.intermediate.dense.weight torch.Size([3072, 768])
encoder.layer.0.inter

In [10]:
from src.model import BertModel, BertConfig

In [11]:
model = BertModel(BertConfig())

In [12]:
for name, param in model.named_parameters():
    print(f"{name}: {param.size()}")

embeddings.word_embeddings.weight: torch.Size([30522, 768])
embeddings.position_embeddings.weight: torch.Size([512, 768])
embeddings.token_type_embeddings.weight: torch.Size([2, 768])
embeddings.LayerNorm.weight: torch.Size([768])
embeddings.LayerNorm.bias: torch.Size([768])
encoder.layer.0.attention.self.query.weight: torch.Size([768, 768])
encoder.layer.0.attention.self.query.bias: torch.Size([768])
encoder.layer.0.attention.self.key.weight: torch.Size([768, 768])
encoder.layer.0.attention.self.key.bias: torch.Size([768])
encoder.layer.0.attention.self.value.weight: torch.Size([768, 768])
encoder.layer.0.attention.self.value.bias: torch.Size([768])
encoder.layer.0.attention.output.dense.weight: torch.Size([768, 768])
encoder.layer.0.attention.output.dense.bias: torch.Size([768])
encoder.layer.0.attention.output.LayerNorm.weight: torch.Size([768])
encoder.layer.0.attention.output.LayerNorm.bias: torch.Size([768])
encoder.layer.0.intermediate.dense.weight: torch.Size([3072, 768])
encod

In [13]:
# Create dictionaries mapping parameter names to sizes for comparison
bert_sizes = {name: tensor.size() for name, tensor in bert_state_dict.items()}
my_sizes = {name: param.size() for name, param in model.named_parameters()}

# Compare the parameter names and sizes
print("Parameters in bert but not in my model:")
for name in bert_sizes:
    if name not in my_sizes:
        print(f"{name}: {bert_sizes[name]}")

print("\nParameters in my model but not in bert:")
for name in my_sizes:
    if name not in bert_sizes:
        print(f"{name}: {my_sizes[name]}")

print("\nParameters with different sizes:")
for name in bert_sizes:
    if name in my_sizes and bert_sizes[name] != my_sizes[name]:
        print(f"{name}:")
        print(f"  bert: {bert_sizes[name]}")
        print(f"  mine: {my_sizes[name]}")


Parameters in bert but not in my model:

Parameters in my model but not in bert:

Parameters with different sizes:
