## Prepare Data + Model

In [1]:
# !cat examples/data/text_forward.txt

In [2]:
# !ls -al ./outputs/en.1-percent.elmo-bert-causal

In [3]:
import torch
from newlm.lm.elmo.modeling_elmo.elmo_head import ELMOBertLMHeadModel
from newlm.lm.elmo.lm_builder import ELMOLMBuilder
from transformers import BertConfig

#### Model

In [4]:
model = ELMOBertLMHeadModel.from_pretrained(
    "./outputs/en.1-percent.elmo-bert-causal"
) # use pre-trained model

In [5]:
model.eval()
print("Model in eval mode for consistency")

Model in eval mode for consistency


#### Data

In [6]:
%%capture

from newlm.utils.file_util import read_from_yaml
config_file = read_from_yaml('examples/configs/run.1-percent-elmo-bert-causal.yaml')

# lm builder (helper)
elmo_lm_builder = ELMOLMBuilder(
    model_config = config_file['lm']['model']['config'],
    tokenizer="./outputs/en.1-percent.elmo-bert-causal", # use pre-trained tokenizer
    model_type="bert-causal-elmo",
    max_len=128
)

# dataset-forward
train_path = "./examples/data/text_forward-small.txt"
ds_f = elmo_lm_builder._get_dataset(train_path)

2021-11-12 10:52:53.587 | INFO     | newlm.lm.elmo.lm_builder:_get_dataset:142 - Constructing roBERTa style dataset


In [8]:
# trainer (helper)
from transformers import TrainingArguments, Trainer
args = TrainingArguments(output_dir="tmpout",**config_file['lm']['hf_trainer']['args'])

# dataloader-forward
trainer = Trainer(model=model, args=args, data_collator=elmo_lm_builder.data_collator, train_dataset=ds_f,)
dl_f = trainer.get_train_dataloader() # Data Loader-forward

max_steps is given, it will override any value given in num_train_epochs


In [9]:
batch_f = next(iter(dl_f))
batch_f['input_ids'].shape

torch.Size([1, 127])

In [10]:
model.eval()
print("Model in eval mode for consistency")

Model in eval mode for consistency


## Sanity Check

In [1]:
# batch_f

In [12]:
import torch

def reverse_batch(batch_f):
    # reverse input
    batch_f_input = torch.clone(batch_f['input_ids'])
    batch_f_rev_input = torch.cat(
        (
            batch_f_input[0][0:1],
            torch.flip(batch_f_input[0][1:-1], [0]),
            batch_f_input[0][-1:]
        )
    )
    batch_f_rev_input = batch_f_rev_input.reshape(1,-1)

    # reverse labels
    batch_f_rev_labels = torch.clone(batch_f_rev_input)
    
    # batch_rev
    batch_rev = batch_f.copy()
    batch_rev['input_ids'] = batch_f_rev_input
    batch_rev['labels'] = batch_f_rev_labels
    
    return batch_rev

In [13]:
import pandas as pd

def pandas_check(batch_f, batch_rev):
    tokens_f = elmo_lm_builder.tokenizer.convert_ids_to_tokens(batch_f['input_ids'][0])
    tokens_f_rev = elmo_lm_builder.tokenizer.convert_ids_to_tokens(batch_rev['input_ids'][0])
    return pd.DataFrame({"forward": tokens_f, "reverse": tokens_f_rev})

#### Normal vs Reverse

In [14]:
batch_rev = reverse_batch(batch_f)

In [15]:
pandas_check(batch_f, batch_rev)

Unnamed: 0,forward,reverse
0,[CLS],[CLS]
1,R,.
2,##ø,Jews
3,##d,among
4,##berg,lived
...,...,...
122,lived,##berg
123,among,##d
124,Jews,##ø
125,.,R


In [16]:
batch_f['input_ids'].shape, batch_rev['input_ids'].shape

(torch.Size([1, 127]), torch.Size([1, 127]))

In [17]:
res = model(**batch_f) # forward

l2r_loss tensor(4.3262, grad_fn=<NllLossBackward>)
r2l_loss tensor(6.8976, grad_fn=<NllLossBackward>)


In [18]:
res = model(**batch_rev) # reverse

l2r_loss tensor(9.2779, grad_fn=<NllLossBackward>)
r2l_loss tensor(6.5343, grad_fn=<NllLossBackward>)


#### Random String

In [24]:
batch_f['input_ids'].shape

torch.Size([1, 127])

In [29]:
# shuffle data
batch_f_input = batch_f['input_ids']
batch_shuffle_input = torch.cat(
    (
        batch_f_input[0][0:1],
        torch.randint(
            low=5, # 0-4 > ['[PAD]', '[UNK]', '[CLS]', '[SEP]', '[MASK]']
            high=29999,
            size=(125,), # modified based on seqlen!
            dtype=torch.long
        ),
        batch_f_input[0][-1:]
    )
)
batch_shuffle_input = batch_shuffle_input.reshape(1,-1)
# labels
batch_shuffle_labels = torch.clone(batch_shuffle_input)    
# batch_shuffle
batch_shuffle = batch_f.copy()
batch_shuffle['input_ids'] = batch_shuffle_input
batch_shuffle['labels'] = batch_shuffle_labels

In [30]:
batch_shuffle_rev = reverse_batch(batch_shuffle)

##### Trial-1

In [21]:
pandas_check(batch_shuffle, batch_shuffle_rev)

Unnamed: 0,forward,reverse
0,[CLS],[CLS]
1,escaped,marriage
2,Develop,circulated
3,elb,ъ
4,Madame,Regional
...,...,...
122,Regional,Madame
123,ъ,elb
124,circulated,Develop
125,marriage,escaped


In [22]:
res = model(**batch_shuffle) # forward

l2r_loss tensor(13.3687, grad_fn=<NllLossBackward>)
r2l_loss tensor(10.0583, grad_fn=<NllLossBackward>)


In [23]:
res = model(**batch_shuffle_rev) # reverse

l2r_loss tensor(13.1889, grad_fn=<NllLossBackward>)
r2l_loss tensor(9.9842, grad_fn=<NllLossBackward>)


##### Trial-2

In [26]:
pandas_check(batch_shuffle, batch_shuffle_rev)

Unnamed: 0,forward,reverse
0,[CLS],[CLS]
1,oscill,Horatio
2,transluc,Wig
3,##brook,assuming
4,Perth,meetings
...,...,...
122,meetings,Perth
123,assuming,##brook
124,Wig,transluc
125,Horatio,oscill


In [27]:
res = model(**batch_shuffle) # forward

l2r_loss tensor(13.2914, grad_fn=<NllLossBackward>)
r2l_loss tensor(10.1427, grad_fn=<NllLossBackward>)


In [28]:
res = model(**batch_shuffle_rev) # reverse

l2r_loss tensor(13.2515, grad_fn=<NllLossBackward>)
r2l_loss tensor(10.0191, grad_fn=<NllLossBackward>)


##### Trial-3

In [31]:
pandas_check(batch_shuffle, batch_shuffle_rev)

Unnamed: 0,forward,reverse
0,[CLS],[CLS]
1,Stevens,Lima
2,Aden,##cing
3,Devil,##wo
4,##ibald,gate
...,...,...
122,gate,##ibald
123,##wo,Devil
124,##cing,Aden
125,Lima,Stevens


In [32]:
res = model(**batch_shuffle) # forward

l2r_loss tensor(13.3330, grad_fn=<NllLossBackward>)
r2l_loss tensor(9.8861, grad_fn=<NllLossBackward>)


In [33]:
res = model(**batch_shuffle_rev) # reverse

l2r_loss tensor(13.3210, grad_fn=<NllLossBackward>)
r2l_loss tensor(9.9579, grad_fn=<NllLossBackward>)
