In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from evo import *

params = {
    'd_model': 512,
    'nhead': 8,
    'dim_feedforward': 2048,
    'dropout': 0.1,
    'encoder_layers': 3,
    'decoder_layers': 3,
    'max_len': 64
}

evolver = Evolver(**params)
ps_evolver = PointerStyleEvolver(**params)

In [None]:
from data import *
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

train_loader = supervised_loader(
    path='data/ud/ud_train_3.0.jsonl',
    max_len=10,
    tokenizer=tokenizer,
    batch_size=4,
    cache_prefix=None,
    all_tokens=True,
    limit=20,
    sampler=StratifiedInfiniteSampler
)

eval_loader = unsupervised_loader(
    path='data/toy/toy.jsonl',
    max_len=10,
    tokenizer=tokenizer,
    batch_size=4,
    sampler=StratifiedInfiniteSampler
)

In [None]:
from run import apply_edits

traj_input_ids, _, traj_edit_tgts, _ = next(iter(train_loader))

apply_edits(traj_input_ids[:, 0], tuple(map(lambda x: x[:, 0], traj_edit_tgts)))

In [None]:
from torch.optim import AdamW

kwargs = {
    'train_loader': train_loader,
    'eval_loader': eval_loader,
    'train_steps': 1,
    'eval_steps': 2,
    'grad_accum_steps': 1,
    'clip_gradients': False,
    'checkpoint_at': 20,
    'eval_at': 1
}

print('STARTING REGULAR EVOLVER')
train_evolver(evolver, AdamW(evolver.parameters(), lr=3e-4), None, **kwargs)

print('STARTING PS EVOLVER')
train_evolver(ps_evolver, AdamW(ps_evolver.parameters(), lr=3e-4), None, **kwargs)

## multihead pointer

In [None]:
import torch
from trans import MultiheadPointer

pointer = MultiheadPointer(512, 8)

mem = torch.randn(3, 10, 512)
tgt = torch.randn(3, 5, 512)
src_pad_mask = torch.full((3, 10), True)
src_pad_mask[:, :7] = False

idx_weights = pointer(tgt, mem, key_padding_mask=src_pad_mask)
idx_weights

## regressions

In [None]:
!python evo.py --config=configs/toy/sup-toy.json --local

In [None]:
!python evo.py --config=configs/toy/sup-toy-epoch.json --local

In [None]:
!python evo.py --config=configs/toy/ps-unsup-toy.json

In [None]:
# !python evo.py --config=configs/toy/ps-sup-toy.json
!python evo.py --config=configs/toy/ps-sup-prefix-toy.json --local

In [None]:
!python evo.py --config=configs/toy/den-toy.json

In [None]:
!python evo.py --config=configs/toy/ar-d-toy.json

In [None]:
# !python evo.py --config=configs/toy/ar-toy.json --local
!python evo.py --config=configs/toy/ar-prefix-toy.json --local

## evolver sampling

In [None]:
import torch
from evo import PointerStyleEvolver

model = PointerStyleEvolver(pointer_attn=True)
model.load_state_dict(torch.load('checkpoints/ps-sup-imdb-pattn_20240822_235245-9900.pt', map_location='cpu')['model'])

In [None]:
from run import sample_trajectory
from utils import BT

# inputs = ['hello my name is']
inputs = ['', '', '']
input_ids = get_input_ids(inputs, max_len=512, tokenizer=BT)

pf_params = {'num_particles': 5, 'resample_at': 32, 'threshold': 3}
traj_ids, traj_edits = sample_trajectory(model, input_ids, 5, pf_params, verbose=True)

## conditional generation

In [None]:
from evo import Transformer
from data import SequenceDataset, InfiniteSampler
from utils import BT

ds = SequenceDataset.from_trajectories(
    path='data/toy/toy.jsonl',
    denoising=False,
    max_len=10,
    tokenizer=BT,
    batch_size=2,
    sampler=InfiniteSampler
)

In [None]:
import torch
from torch.utils.data import DataLoader
from const import *

loader = DataLoader(ds, batch_size=2)
input_ids, output_ids = next(iter(loader))

input_ids[:, 1] = 2
input_ids[:, 2] = 102
print(input_ids)

logits = torch.randn_like(output_ids, dtype=torch.float)[:, :-1]
print(logits)

logits[(input_ids[:, 1:] != PAD_TOKEN_ID) & (input_ids[:, 1:] != EOS_TOKEN_ID)]

## autoregressive sampling?

## gpt2 eval

In [None]:
from run import compute_ppl
from transformers import GPT2LMHeadModel, GPT2Tokenizer

model = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

a, b = compute_ppl(model, tokenizer, 'hello my name is tj', 'cpu')