In [1]:
import os
import argparse
import glob
import time
import random
from datetime import datetime

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import higher
from torch.utils.tensorboard import SummaryWriter

In [2]:
import utils

from train_2 import EditTrainer
from config import EditConfig


In [4]:
model, tokenizer = utils.loadOTSModel()
dataloader = utils.retrieveDataloader(
        tokenizer, 
        bs=1, 
        dataset='train'
    )
del model

In [5]:
class EditConfig:
    def __init__(self):
        self.inner_lr = 1e-3
        self.outer_lr = 1e-5
        self.epochs = 1
        self.max_training_samps = 2e4
        self.n_edit_steps = 1
        self.cedit = 1
        self.cloc = 1

        self.debug = True
        self.model_save_pt = 2000
        self.model_dir = '../models'

## Test Run

In [None]:
%load_ext autoreload
%autoreload 2
import utils

from train_2 import EditTrainer
from config import EditConfig


## Testing Language Generation

In [12]:
# text = "Poppies and lupines are the primary flowers that dominate the green, lush rolling meadows in the spring to early summer, but in the late summer the hills turn gold and are covered with flowers such as the cluster-lily, gumweed, Mules ear, and farewell to spring. In shaded areas, one can expect to see thimbleberry and a variety of ferns growing. "

In [13]:
text = "Rather than having all of its functionality built into its core, Python was designed to be highly extensible (with modules). This compact modularity has made it particularly popular as a means of adding programmable interfaces to existing applications. Van Rossum's vision of a small core language with a large standard library and easily extensible interpreter stemmed from his frustrations with ABC, which espoused the opposite approach."

In [14]:
token_list = tokenizer.encode(text)
token_out = tokenizer(
    text
)
tokens, mask = map(
    torch.tensor, 
    [token_out['input_ids'], token_out['attention_mask']]
)

In [15]:
tokens.size()

torch.Size([79])

In [16]:
feed = tokens[:40]
print(tokenizer.decode(token_list[:40]))

Rather than having all of its functionality built into its core, Python was designed to be highly extensible (with modules). This compact modularity has made it particularly popular as a means of adding programmable


In [17]:

inputs = tokenizer("Hello, my dog is cute and ", return_tensors="pt")
generation_output = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [18]:
generation_output

GreedySearchDecoderOnlyOutput(sequences=tensor([[15496,    11,   616,  3290,   318, 13779,   290,   220, 17479,    13,
           314,  1842,   683,   523,   881,    13,   314,  1842,   683,   523]]), scores=(tensor([[-43.4955, -46.0844, -46.9108,  ..., -57.7377, -51.5319, -48.3616]]), tensor([[-48.8799, -50.3510, -55.1450,  ..., -64.8533, -62.6215, -52.4255]]), tensor([[-67.7690, -65.9575, -67.5762,  ..., -76.5747, -74.8564, -60.7996]]), tensor([[-86.5862, -85.9131, -89.9283,  ..., -97.0860, -86.9367, -87.2296]]), tensor([[-66.0425, -67.5343, -71.4601,  ..., -75.1458, -72.8039, -67.9962]]), tensor([[-59.0425, -63.4027, -69.0216,  ..., -77.3129, -73.8313, -64.7133]]), tensor([[-60.0665, -62.8150, -68.4672,  ..., -72.5271, -70.3083, -63.8665]]), tensor([[-47.8266, -52.4544, -58.7991,  ..., -65.4726, -64.2974, -52.0334]]), tensor([[-71.7299, -70.1800, -72.2610,  ..., -82.0179, -80.4628, -63.5811]]), tensor([[-86.2451, -85.7662, -90.2981,  ..., -97.1315, -86.9278, -86.6646]]), tensor([[-6

In [19]:
tokenizer.decode(tokenizer.eos_token_id)

'<|endoftext|>'

In [20]:
text = "Rather than having all of its functionality built into its core, Python was designed to be highly extensible (with modules). This compact modularity has made"

In [22]:
prompt_text = text
encoded_prompt = tokenizer.encode(text, add_special_tokens=False, return_tensors="pt")
input_ids = encoded_prompt

output_sequences = model.generate(
    input_ids=input_ids,
    max_length=20 + len(encoded_prompt[0]),
    temperature=1.2,
#     top_k=args.k,
#     top_p=args.p,
    repetition_penalty=1,
    do_sample=True,
    num_return_sequences=1,
)

# Remove the batch dimension when returning multiple sequences
if len(output_sequences.shape) > 2:
    output_sequences.squeeze_()

generated_sequences = []

for generated_sequence_idx, generated_sequence in enumerate(output_sequences):
    print("=== GENERATED SEQUENCE {} ===".format(generated_sequence_idx + 1))
    generated_sequence = generated_sequence.tolist()

    # Decode text
    text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)

    # Remove all text after the stop token
    text = text[: text.find('<|endoftext|>')]

    # Add the prompt at the beginning of the sequence. Remove the excess text that was used for pre-processing
    total_sequence = (
        prompt_text + text[len(tokenizer.decode(encoded_prompt[0], clean_up_tokenization_spaces=True)) :]
    )

    generated_sequences.append(total_sequence)
    print(total_sequence)

generated_sequences

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


=== GENERATED SEQUENCE 1 ===
Rather than having all of its functionality built into its core, Python was designed to be highly extensible (with modules). This compact modularity has made much of python less complex but it is also able to do other things just as easy for anyone t


['Rather than having all of its functionality built into its core, Python was designed to be highly extensible (with modules). This compact modularity has made much of python less complex but it is also able to do other things just as easy for anyone t']

In [28]:
prompt_text = text
encoded_prompt = tokenizer.encode(text, add_special_tokens=False, return_tensors="pt")
input_ids = encoded_prompt

output_sequences = model.generate(
    input_ids=input_ids,
    max_length=20 + len(encoded_prompt[0]),
    temperature=1.2,
#     top_k=args.k,
#     top_p=args.p,
    repetition_penalty=1,
    do_sample=True,
    num_return_sequences=5,
)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [36]:
print(input_ids.size())
for i in range(4):
    print(output_sequences[i, 49:])

torch.Size([1, 50])
tensor([  256, 34369,    13,  1320,   531,   532,   314,   836,   470,   892,
         9599,   508, 14759, 11361,  1865,   815,   307,   379,   477, 32064,
           13])
tensor([ 256, 7834,  306,  284, 3551, 2438,  357,   72,   13,   68,   13, 3551,
        6725,  326,  314, 1101, 1262,  287,  257, 2248,  737])
tensor([ 256, 5430, 1223,  656,  663, 2779,  393,  655, 1804, 1243, 1088,  340,
          13,  198,  198,  198, 3844,   11,  981,  428, 1244])
tensor([  256,  7834,   278,   351, 11361,    13,   198,   198,   198,  1135,
          550,  2904,  3066,   422,   262,  3726,   326,   356,   655,  2622,
          284])


In [25]:
import random

class SelfSampleTrainer(EditTrainer):
    def __init__(self, config, dataloader, model_path=None):
        super().__init__(config, dataloader, model_path) 
        
        self.finetuned = utils.loadTrainedModel(
            "../models/finetune/gpt2_epoch0_ts10000.20210310.18.03.1615401990", 
            tokenizer=False
        )
        self.finetuned.to(self.device)
        
    def genModelText(self, lm_tokens, edit_locs):
        

        input_ids = lm_tokens[:, :edit_locs.min()]
        input_size = input_ids.size()[-1]
        
        self.model.eval()
        print("generating")
        output_sequence = self.finetuned.generate(
            input_ids=input_ids,
            max_length=input_size + 15,
            temperature=1.2,
            repetition_penalty=1.0,
            do_sample=True,
            num_return_sequences=10,
        )

        edit_tokens = random.choice(output_sequence).unsqueeze(0)
        edit_mask = torch.ones(edit_tokens.shape, dtype=torch.long)
        edit_labels = torch.zeros(edit_tokens.shape, dtype=torch.long) - 100
        edit_labels[:, input_size:] = edit_tokens[:, input_size:]
        edit_labels = edit_labels.to(self.device)
        edit_tokens, edit_mask = edit_tokens.to(self.device), edit_mask.to(self.device)

        return edit_tokens, edit_mask, edit_labels

    
    def run(self):

        if not self.config.debug:
            torch.save(self.config, self.hyperspath)

        self.model.train()
        self.model.to(self.device)
        opt = torch.optim.Adam(
            self.model.parameters(), 
            self.config.outer_lr
            )
        
        global_iter = 0
        print("Starting Training")

        for epoch in range(self.config.epochs):
            self.epoch = epoch
            
            for train_step, (lm_data, edit_example, ent) in enumerate(self.data):
            
                lm_tokens, lm_mask = lm_data
                lm_tokens, lm_mask = lm_tokens.to(self.device), lm_mask.to(self.device)
                lm_labels = lm_tokens.masked_fill(lm_mask == 0, -100)
                
                ent_tokens = ent[0].flatten()
                ent_tokens = ent_tokens[ent_tokens != 50256]
                edit_locs = utils.locateEntityEdit(edit_example[0], ent_tokens)
                if edit_locs.size == 0 or edit_locs.min() == 0:
                    continue
                
                try:
                    edit_tokens, edit_mask, edit_labels = self.genModelText(lm_tokens, edit_locs)
                except RuntimeError:
                    breakpoint()
                
                
                inner_opt = torch.optim.SGD(
                    self.model.transformer.h[-3:].parameters(), 
                    lr=self.config.inner_lr
                    )

                with higher.innerloop_ctx(
                    self.model, 
                    inner_opt, 
                    copy_initial_weights=False, 
                    track_higher_grads=True
                    ) as (fmodel, diffopt):
                    
                    for edit_step in range(self.config.n_edit_steps):

                        loss = fmodel(
                            edit_tokens, 
                            attention_mask=edit_mask,
                            labels=edit_labels
                        ).loss
                        diffopt.step(loss)

                    edit_out = fmodel(
                        edit_tokens, 
                        attention_mask=edit_mask,
                        labels=edit_labels
                    )
                    l_edit = edit_out.loss
                    
                    base_out = self.model(
                        lm_tokens, 
                        attention_mask=lm_mask,
                        labels=lm_labels
                    )
                    l_base = base_out.loss

                    edited_base_out = fmodel(
                        lm_tokens, 
                        attention_mask=lm_mask,
                        labels=lm_labels
                    )

                    l_loc =  (
                        F.softmax(base_out.logits.detach(), dim=-1) *
                        (
                            F.log_softmax(base_out.logits.detach(), dim=-1) - 
                            F.log_softmax(edited_base_out.logits, dim=-1)
                        )).sum(-1).mean()
                    
                    total_loss = (
                        l_base + 
                        self.config.cloc * l_loc  + 
                        self.config.cedit * l_edit
                        )
                    total_loss.backward()

                    # accumulate grads 
                    if train_step % 5 == 0:
                        opt.step()
                        opt.zero_grad()
                    
                    global_iter += 1
                    
                    loss_dict = {
                        "l_base": l_base, "l_edit": l_edit, 
                        "l_loc": l_loc, "total": total_loss
                        }
                    self.echo(train_step, **loss_dict)
                    if not self.config.debug:
                        self.tensorBoard(global_iter, **loss_dict)
                        self.saveModel(self.model, train_step)

                        if train_step % 1000 == 0:
                            self.validateEditTraining()

        if not self.config.debug:
            self.saveModel(self.model, train_step)
        self.writer.flush()

In [26]:
trainer = SelfSampleTrainer(EditConfig(), dataloader)
trainer.run()

Starting Training


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 0; ', 'l_base 4.847848892211914; l_edit 4.031960487365723; l_loc 9.952959953807294e-05; total 8.87990951538086')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 1; ', 'l_base 4.275406360626221; l_edit 3.1473236083984375; l_loc 0.00022555608302354813; total 7.422955513000488')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 2; ', 'l_base 3.782113552093506; l_edit 4.284780979156494; l_loc 6.106802902650088e-05; total 8.06695556640625')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 3; ', 'l_base 5.968358039855957; l_edit 5.778411865234375; l_loc 8.791322761680931e-05; total 11.746857643127441')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 4; ', 'l_base 4.617933750152588; l_edit 4.105910778045654; l_loc 0.0001569620508234948; total 8.724000930786133')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 5; ', 'l_base 3.728123426437378; l_edit 4.264476299285889; l_loc 0.0002277493040310219; total 7.992827415466309')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 6; ', 'l_base 4.511284351348877; l_edit 3.8918631076812744; l_loc 3.718812149600126e-05; total 8.40318489074707')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 7; ', 'l_base 3.945984125137329; l_edit 3.4840424060821533; l_loc 0.00011122846626676619; total 7.430137634277344')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 8; ', 'l_base 3.847501754760742; l_edit 4.626814365386963; l_loc 0.0002939675759989768; total 8.474610328674316')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 9; ', 'l_base 4.346887111663818; l_edit 4.933980464935303; l_loc 1.8954173356178217e-05; total 9.28088665008545')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 10; ', 'l_base 3.5720832347869873; l_edit 3.741851806640625; l_loc 8.901715045794845e-05; total 7.314023971557617')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 12; ', 'l_base 4.4242401123046875; l_edit 4.504977703094482; l_loc 0.00032653138623572886; total 8.929544448852539')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 15; ', 'l_base 3.4372527599334717; l_edit 5.071290493011475; l_loc 0.00014113281213212758; total 8.508684158325195')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 16; ', 'l_base 4.045205116271973; l_edit 3.5713987350463867; l_loc 5.901257827645168e-05; total 7.616662979125977')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 17; ', 'l_base 3.884176254272461; l_edit 4.2924652099609375; l_loc 7.441438356181607e-05; total 8.176715850830078')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 18; ', 'l_base 4.451939105987549; l_edit 4.54489278793335; l_loc 7.86064556450583e-05; total 8.996910095214844')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 19; ', 'l_base 3.5563535690307617; l_edit 3.7011563777923584; l_loc 3.8504575059050694e-05; total 7.2575483322143555')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 20; ', 'l_base 3.904815673828125; l_edit 3.037069082260132; l_loc 0.00012207838881295174; total 6.942007064819336')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 21; ', 'l_base 3.9763643741607666; l_edit 4.373611927032471; l_loc 7.194503268692642e-05; total 8.350048065185547')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 22; ', 'l_base 4.702048301696777; l_edit 4.3300089836120605; l_loc 9.288084402214736e-05; total 9.032150268554688')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 23; ', 'l_base 4.378472805023193; l_edit 3.226142406463623; l_loc 0.00019204254203941673; total 7.604807376861572')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 24; ', 'l_base 4.426316261291504; l_edit 2.9725394248962402; l_loc 9.052864334080368e-05; total 7.398946285247803')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 25; ', 'l_base 4.154033184051514; l_edit 3.3741378784179688; l_loc 8.760294440435246e-05; total 7.528258800506592')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 26; ', 'l_base 4.826742649078369; l_edit 3.2457330226898193; l_loc 5.1771792641375214e-05; total 8.072527885437012')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 27; ', 'l_base 4.669022083282471; l_edit 3.006422281265259; l_loc 2.626604873512406e-05; total 7.675470352172852')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 28; ', 'l_base 4.381113529205322; l_edit 5.3278584480285645; l_loc 0.00037420407170429826; total 9.709346771240234')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 29; ', 'l_base 4.869408130645752; l_edit 3.8057005405426025; l_loc 5.38376480108127e-05; total 8.675162315368652')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 30; ', 'l_base 4.149905204772949; l_edit 5.066888809204102; l_loc 0.0003465033951215446; total 9.217140197753906')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 32; ', 'l_base 4.0102338790893555; l_edit 3.1304361820220947; l_loc 2.808983117574826e-05; total 7.140698432922363')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 33; ', 'l_base 4.764048099517822; l_edit 3.544593095779419; l_loc 0.0001973041071323678; total 8.308838844299316')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 34; ', 'l_base 3.658181667327881; l_edit 4.895082950592041; l_loc 7.969007856445387e-05; total 8.5533447265625')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 37; ', 'l_base 4.282776355743408; l_edit 4.813879489898682; l_loc 6.024364847689867e-05; total 9.096715927124023')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 38; ', 'l_base 5.005941390991211; l_edit 4.034594535827637; l_loc 0.00024233113799709827; total 9.040778160095215')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 39; ', 'l_base 4.160793781280518; l_edit 2.9221174716949463; l_loc 0.00010258512338623405; total 7.083013534545898')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 40; ', 'l_base 3.9224605560302734; l_edit 4.858457565307617; l_loc 0.00028054192080162466; total 8.781198501586914')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 41; ', 'l_base 4.024776935577393; l_edit 4.325339317321777; l_loc 0.00029076970531605184; total 8.350406646728516')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 43; ', 'l_base 4.384141445159912; l_edit 4.176210880279541; l_loc 0.0003270713787060231; total 8.56067943572998')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 46; ', 'l_base 3.539642810821533; l_edit 3.998319149017334; l_loc 0.0003028751234523952; total 7.538264751434326')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 47; ', 'l_base 4.310018539428711; l_edit 4.299783229827881; l_loc 5.600602526101284e-05; total 8.609857559204102')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 49; ', 'l_base 4.333138942718506; l_edit 4.740351676940918; l_loc 7.366617501247674e-05; total 9.073564529418945')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 51; ', 'l_base 3.76676082611084; l_edit 3.8159353733062744; l_loc 2.504743133613374e-05; total 7.58272123336792')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 52; ', 'l_base 4.971736907958984; l_edit 3.6805267333984375; l_loc 0.0001584748679306358; total 8.652421951293945')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 53; ', 'l_base 4.35655403137207; l_edit 2.973344326019287; l_loc 0.00017532965284772217; total 7.330073833465576')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 54; ', 'l_base 4.4922380447387695; l_edit 2.994445562362671; l_loc 0.00021938547433819622; total 7.486903190612793')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 55; ', 'l_base 4.211339473724365; l_edit 4.303797245025635; l_loc 2.632146788528189e-05; total 8.51516342163086')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 56; ', 'l_base 4.655278205871582; l_edit 3.7920455932617188; l_loc 0.00012461580627132207; total 8.44744873046875')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 57; ', 'l_base 3.7548792362213135; l_edit 5.236897945404053; l_loc 3.0249722840380855e-05; total 8.99180793762207')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 58; ', 'l_base 4.835113525390625; l_edit 4.584047317504883; l_loc 3.1140370992943645e-05; total 9.419191360473633')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 59; ', 'l_base 4.019800186157227; l_edit 4.237873077392578; l_loc 9.040355507750064e-05; total 8.257763862609863')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 61; ', 'l_base 3.6006174087524414; l_edit 3.459458827972412; l_loc 0.00011047662701457739; total 7.060186386108398')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 62; ', 'l_base 3.921640396118164; l_edit 4.932989120483398; l_loc 0.00017465860582888126; total 8.854804039001465')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 63; ', 'l_base 4.590113162994385; l_edit 3.6603739261627197; l_loc 6.826685421401635e-05; total 8.250555038452148')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 64; ', 'l_base 4.202213764190674; l_edit 3.5252580642700195; l_loc 5.8449124480830505e-05; total 7.727530479431152')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 65; ', 'l_base 5.569684028625488; l_edit 4.2437968254089355; l_loc 3.275422932347283e-05; total 9.81351375579834')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 66; ', 'l_base 4.485429763793945; l_edit 3.288452625274658; l_loc 0.0001297703420277685; total 7.774012088775635')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 67; ', 'l_base 3.8527817726135254; l_edit 4.331613063812256; l_loc 2.253658567497041e-05; total 8.184417724609375')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 69; ', 'l_base 4.649467945098877; l_edit 5.390839099884033; l_loc 0.0002832601312547922; total 10.040590286254883')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 70; ', 'l_base 3.32847261428833; l_edit 4.047585964202881; l_loc 0.00012693763710558414; total 7.376185417175293')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 71; ', 'l_base 4.309097766876221; l_edit 2.7772390842437744; l_loc 0.00031483039492741227; total 7.086651802062988')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 72; ', 'l_base 4.557061195373535; l_edit 4.41136360168457; l_loc 0.00010058205225504935; total 8.968524932861328')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 73; ', 'l_base 3.8135159015655518; l_edit 4.9638142585754395; l_loc 7.020533666945994e-05; total 8.777400016784668')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 74; ', 'l_base 3.8964343070983887; l_edit 3.850329637527466; l_loc 0.00015696111950092018; total 7.746920585632324')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 75; ', 'l_base 4.012128829956055; l_edit 4.725078105926514; l_loc 0.00020317191956564784; total 8.737409591674805')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 76; ', 'l_base 5.2391581535339355; l_edit 3.1008458137512207; l_loc 2.890418363676872e-05; total 8.340032577514648')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 77; ', 'l_base 3.7875313758850098; l_edit 4.1634321212768555; l_loc 0.00019793372484855354; total 7.9511613845825195')
generating


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Epoch: 0; TrainStep 78; ', 'l_base 3.467377185821533; l_edit 4.028688907623291; l_loc 0.00019779133435804397; total 7.4962639808654785')
generating


KeyboardInterrupt: 