In [118]:
import sys
import torch
import plotly.io as pio
import einops
from transformer_lens import HookedTransformerConfig, HookedTransformer

pio.renderers.default = "notebook_connected"
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
torch.autograd.set_grad_enabled(False)
torch.set_grad_enabled(False)

sys.path.append('../')  # Add the parent directory to the system path
import utils.haystack_utils as haystack_utils
from process_tiny_stories_data import load_tinystories_validation_prompts
%reload_ext autoreload
%autoreload 2

In [110]:
def convert_neo_weights(neo, cfg: HookedTransformerConfig):
    state_dict = {}

    state_dict["embed.W_E"] = neo[f'transformer.wte.weight']
    state_dict["pos_embed.W_pos"] = torch.zeros(2048, 768).to(device)
    state_dict["pos_embed.W_pos"][:256] = neo['transformer.wpe.weight']

    for l in range(cfg.n_layers):
        state_dict[f"blocks.{l}.ln1.w"] = neo[f'transformer.h.{l}.ln_1.weight']
        state_dict[f"blocks.{l}.ln1.b"] = neo[f'transformer.h.{l}.ln_1.bias']

        W_Q = neo[f'transformer.h.{l}.attn.attention.q_proj.weight']
        W_K = neo[f'transformer.h.{l}.attn.attention.k_proj.weight']
        W_V = neo[f'transformer.h.{l}.attn.attention.v_proj.weight']
        W_Q = einops.rearrange(W_Q, "(i h) m->i m h", i=cfg.n_heads)
        W_K = einops.rearrange(W_K, "(i h) m->i m h", i=cfg.n_heads)
        W_V = einops.rearrange(W_V, "(i h) m->i m h", i=cfg.n_heads)
        state_dict[f"blocks.{l}.attn.W_Q"] = W_Q
        state_dict[f"blocks.{l}.attn.W_K"] = W_K
        state_dict[f"blocks.{l}.attn.W_V"] = W_V

        state_dict[f"blocks.{l}.attn.b_Q"] = torch.zeros(cfg.n_heads, cfg.d_head).to(device)
        state_dict[f"blocks.{l}.attn.b_K"] = torch.zeros(cfg.n_heads, cfg.d_head).to(device)
        state_dict[f"blocks.{l}.attn.b_V"] = torch.zeros(cfg.n_heads, cfg.d_head).to(device)

        W_O = neo[f'transformer.h.{l}.attn.attention.out_proj.weight']
        W_O = einops.rearrange(W_O, "m (i h)->i h m", i=cfg.n_heads)
        state_dict[f"blocks.{l}.attn.W_O"] = W_O
        state_dict[f"blocks.{l}.attn.b_O"] = neo[f'transformer.h.{l}.attn.attention.out_proj.bias']

        state_dict[f"blocks.{l}.ln2.w"] = neo[f'transformer.h.{l}.ln_2.weight']
        state_dict[f"blocks.{l}.ln2.b"] = neo[f'transformer.h.{l}.ln_2.bias']

        state_dict[f"blocks.{l}.mlp.W_in"] = neo[f'transformer.h.{l}.mlp.c_fc.weight'].T
        state_dict[f"blocks.{l}.mlp.b_in"] = neo[f'transformer.h.{l}.mlp.c_fc.bias']

        state_dict[f"blocks.{l}.mlp.W_out"] = neo[f'transformer.h.{l}.mlp.c_proj.weight'].T
        state_dict[f"blocks.{l}.mlp.b_out"] = neo[f'transformer.h.{l}.mlp.c_proj.bias']
    state_dict["ln_final.w"] = neo['transformer.ln_f.weight']
    state_dict["ln_final.b"] = neo['transformer.ln_f.bias']

    state_dict["unembed.W_U"] = neo['lm_head.weight'].T
    state_dict["unembed.b_U"] = torch.zeros(cfg.d_vocab).to(device)
    return state_dict

In [117]:
model_name = "tiny-stories-33M"
save_name = "15_vocal_bush"
checkpoint_state_dict = torch.load(f"/workspace/data/{model_name}/{save_name}_10000.pt")
cfg = haystack_utils.load_json_data(f"/workspace/data/{model_name}/{save_name}.json")
model = HookedTransformer.from_pretrained(
    "tiny-stories-33M",
    device=device,
)

hooked_transformer_cfg = HookedTransformerConfig(
    cfg['num_layers'], 
    cfg['hidden_size'], 
    cfg['window_size'], 
    cfg['hidden_size'] // cfg['num_heads'],
    act_fn=cfg['activation_function'],
    d_mlp=4 * cfg['hidden_size'],
    d_vocab=cfg['vocab_size'],
    ) 

checkpoint_state_dict = convert_neo_weights(checkpoint_state_dict, hooked_transformer_cfg)
haystack_utils.clean_cache()
model.load_and_process_state_dict(checkpoint_state_dict)

/workspace/data/tiny-stories-33M/15_vocal_bush.json: Loaded 40 examples with 2 to 23 characters each.
Loaded pretrained model tiny-stories-33M into HookedTransformer


In [116]:
model.generate("Sally and Lucy walked over to the", 20, temperature=0.5)

  0%|          | 0/20 [00:00<?, ?it/s]

'Sally and Lucy walked over to the and her. saw big and and, sun the in sky She to. smiled said " you a'

In [123]:
prompts = load_tinystories_validation_prompts()
losses = []
for prompt in prompts[:200]:
    loss = model(prompt, return_type='loss')
    losses.append(loss.item())
custom_model_loss = np.mean(losses)

In [122]:

model_names = ['roneneldan/TinyStories-1M', 'roneneldan/TinyStories-3M', 'roneneldan/TinyStories-8M', 'roneneldan/TinyStories-28M', 'roneneldan/TinyStories-33M', 'roneneldan/TinyStories-1Layer-21M', 'roneneldan/TinyStories-2Layers-33M']
# model_names = ["tiny-stories-1L", "tiny-stories-1L-21M", "tiny-stories-8M", "tiny-stories-33M", "tiny-stories-2L-33M"]
avg_losses = []
for pretrained_name in model_names:
    pretrained = HookedTransformer.from_pretrained(
        pretrained_name,
        center_unembed=True,
        center_writing_weights=True,
        fold_ln=True,
        device=device,
    )

    losses = []
    for prompt in prompts[:200]:
        loss = pretrained(prompt, return_type='loss')
        losses.append(loss.item())
    avg_losses.append(np.mean(losses))


Loaded pretrained model roneneldan/TinyStories-1M into HookedTransformer


config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/66.7M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/722 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

Loaded pretrained model roneneldan/TinyStories-3M into HookedTransformer


config.json:   0%|          | 0.00/1.16k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/112M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/722 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

Loaded pretrained model roneneldan/TinyStories-8M into HookedTransformer


config.json:   0%|          | 0.00/1.16k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/242M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/722 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

Loaded pretrained model roneneldan/TinyStories-28M into HookedTransformer
Loaded pretrained model roneneldan/TinyStories-33M into HookedTransformer


config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/269M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/722 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

Loaded pretrained model roneneldan/TinyStories-1Layer-21M into HookedTransformer


config.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/323M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/722 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

Loaded pretrained model roneneldan/TinyStories-2Layers-33M into HookedTransformer


In [128]:
df = pd.DataFrame({
    "Model": model_names + ["Custom model 33M"],
    "Average loss": avg_losses + [custom_model_loss]
})

print(df.head(20))

px.bar(df, x="Model", y="Average loss", title="Average losses of pretrained TinyStories models compared with our trained model")

                                Model  Average loss
0           roneneldan/TinyStories-1M      1.885772
1           roneneldan/TinyStories-3M      1.436941
2           roneneldan/TinyStories-8M      1.153234
3          roneneldan/TinyStories-28M      1.002276
4          roneneldan/TinyStories-33M      0.988238
5   roneneldan/TinyStories-1Layer-21M      1.257198
6  roneneldan/TinyStories-2Layers-33M      1.076359
7                    Custom model 33M      6.784503
