In [1]:
from transformers import LlamaForCausalLM, LlamaConfig, AutoModel, LlamaTokenizerFast
import torch
from torch.utils.data import DataLoader
import argparse
from ruamel.yaml import YAML
import copy

from model import Transformer
from data_loader import TokenizedDataset
from tokenizer import Tokenizer

from model_ref import Transformer as OtherTransformer, load_hf_model

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ref_model = load_hf_model("meta-llama/Llama-2-7b-hf")

Downloading shards: 100%|██████████| 2/2 [00:00<00:00, 1380.16it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.24it/s]


In [3]:
meta_model = Transformer.load_meta_llama2("../llama/llama-2-7b")

In [4]:
model = Transformer.load_hf_llama2("meta-llama/Llama-2-7b-hf")

Downloading shards: 100%|██████████| 2/2 [00:00<00:00, 1362.01it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.39it/s]


In [5]:
hf_model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")

Downloading shards: 100%|██████████| 2/2 [00:00<00:00, 1328.36it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.45it/s]


In [6]:
a = hf_model.model.layers[1].self_attn.q_proj.weight.data
b = model.blocks[1].attn_layer.attn_proj_q.weight.data
c = ref_model.layers[1].attention.wq.weight.data
d = meta_model.blocks[1].attn_layer.attn_proj_q.weight.data
check1 = (~torch.eq(a, b)).flatten()
check2 = (~torch.eq(a, c)).flatten()
check3 = (~torch.eq(a, d)).flatten()

In [7]:
check1.sum()

tensor(16501011)

In [8]:
check2.sum()

tensor(16501011)

In [9]:
check3.sum()

tensor(16501075)

In [10]:
for (a_n, a), (b_n, b) in zip(model.named_parameters(), meta_model.named_parameters()):
    if not torch.eq(a, b).all():
        print(a_n)
    break

embedding.weight


In [18]:
model.embedding.weight.data[0,0].item()

1.2516975402832031e-06

In [19]:
meta_model.embedding.weight.data[0,0].item()

1.2293457984924316e-06

In [20]:
ref_model.tok_embeddings.weight.data[0,0].item()

1.2516975402832031e-06

In [23]:
hf_model.model.embed_tokens.weight.data[0,0].item()

1.2516975402832031e-06

In [16]:
model.embedding.weight.flatten()[10725].item()

0.26171875

In [17]:
meta_model.embedding.weight.flatten()[10725].item()

0.26171875

In [4]:
ds = TokenizedDataset(filenames='data/train.bin', context_length=4096)
dl = DataLoader(ds, batch_size=1)

In [5]:
x, y = next(iter(dl))

In [6]:
with torch.no_grad():
    y_pred = model(x)

In [7]:
with torch.no_grad():
    y_pred2 = hf_model(x)[0]

In [8]:
y_pred2.shape

torch.Size([1, 4096, 32000])

In [13]:
y_pred[0,10]

tensor([ 7.3959,  8.8286, -0.4318,  ...,  6.1698,  6.7819,  8.1284])

In [12]:
y_pred2[0,10]

tensor([ 7.3959,  8.8286, -0.4318,  ...,  6.1698,  6.7819,  8.1284])

In [27]:
llama_tokenizer = LlamaTokenizerFast.from_pretrained("meta-llama/Llama-2-7b-hf")

tokenizer_config.json: 100%|██████████| 776/776 [00:00<00:00, 6.31MB/s]
tokenizer.model: 100%|██████████| 500k/500k [00:00<00:00, 42.8MB/s]
tokenizer.json: 100%|██████████| 1.84M/1.84M [00:00<00:00, 3.52MB/s]
special_tokens_map.json: 100%|██████████| 414/414 [00:00<00:00, 874kB/s]


In [31]:
tokenizer = Tokenizer()

In [38]:
llama_tokenizer(s)["input_ids"]

[1, 910, 338, 263, 1243, 10541, 29889]

In [24]:
s = "This is a test sentence."

In [37]:
tokenizer.encode(s, bos=True)

[1, 910, 338, 263, 1243, 10541, 29889]