In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
from pathlib import Path

In `config`:

    config = Config(
        max_length = 64  # in tokens
        from_pretrained = "facebook/bart-base"
        model_type = "VAE"
    )

In [4]:
from smtag.config import config
assert config.max_length==64
config.from_pretrained=='facebook/bart-base'
config.model_type=='VAE'

Downloading:   0%|          | 0.00/1.68k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

In [5]:
from transformers import __version__
__version__

'4.15.0'

## Extracting examples for LM

In [6]:
from smtag.extract import ExtractorXML

In [7]:
! dir /data/xml/oapmc_articles

eval  test  train


In [12]:
# ! rm -fr /data/text/oapmc_title

In [6]:
corpus = "/data/xml/oapmc_articles"
text_examples = "/data/text/oapmc_title"
xpath = ".//article-meta/title-group/article-title"
sentence_level = False
keep_xml = False
inclusion_probability = 1.0

In [14]:
extractor_lm = ExtractorXML(
    corpus,
    destination_dir=text_examples,
    sentence_level=sentence_level,
    xpath=xpath,
    keep_xml=keep_xml,
    inclusion_probability=inclusion_probability
)

/data/text/oapmc_title created


In [15]:
extractor_lm.extract_from_corpus()

100%|██████████| 3223/3223 [1:31:53<00:00,  1.71s/it]
100%|██████████| 10/10 [00:16<00:00,  1.68s/it]
100%|██████████| 10/10 [00:16<00:00,  1.67s/it]


{PosixPath('/data/text/oapmc_title/train.txt'): 967335,
 PosixPath('/data/text/oapmc_title/eval.txt'): 2970,
 PosixPath('/data/text/oapmc_title/test.txt'): 3004}

#### same via CLI:

```bash
python -m smtag.cli.prepro.extract /data/xml/emboj_all /data/text/emboj_twin --xpath ".//article-meta/title-group/article-title" ".//abstract"
```

## Preparing tokenized dataset for LM

In [16]:
from smtag.dataprep import PreparatorLM

In [7]:
tokenized_examples = "/data/json/oapmc_title"

In [18]:
# ! rm -fr /data/json/oapmc_title

In [19]:
prep_lm = PreparatorLM(
    text_examples,
    tokenized_examples,
    max_length=config.max_length
)

/data/json/oapmc_title created


In [20]:
prep_lm.run()

Preparing: train


100%|██████████| 967335/967335 [22:17<00:00, 723.36it/s]



Length verification: OK!

average input_ids length = 35 (min=16, max=64) tokens
longest example: <s>Efficiency and Tolerability of Induction and Consolidation Therapy with Arsenic Trioxide/Bortezomib/Ascorbic Acid/Dexamethasone (ABCD) Regimen Compared to Bortezomib/Dexamethasone (BD) Regimen in</s>
shortest example: <s>Data on entrepreneurship education and entrepreneurial performance of aspiring entrepreneurs in selected Nigerian universities</s>
Preparing: eval


100%|██████████| 2970/2970 [00:02<00:00, 1009.73it/s]



Length verification: OK!

average input_ids length = 35 (min=18, max=64) tokens
longest example: <s>Characterization of an Indole-3-Acetamide Hydrolase from Alcaligenes faecalis subsp. parafaecalis and Its Application in Efficient Preparation of Both Enantiomers of Chiral Building Block 2,3-Dihydro-1,4-Benz</s>
shortest example: <s>A bilateral tumor model identifies transcriptional programs associated with patient response to immune checkpoint blockade</s>
Preparing: test


100%|██████████| 3004/3004 [00:04<00:00, 747.14it/s]


Length verification: OK!

average input_ids length = 35 (min=18, max=64) tokens
longest example: <s>Comparison of Inappropriate Shocks and Other Health Outcomes Between Single- and Dual-Chamber Implantable Cardioverter-Defibrillators for Primary Prevention of Sudden Cardiac Death: Results From the Cardiovascular Research Network Longitudinal Study of Implantable Cardioverter-Def</s>
shortest example: <s>Primary care provider perceptions of intake transition records and shared care with outpatient cardiac rehabilitation programs</s>





same vie CLI:
    
```bash
python -m smtag.cli.lm.dataprep /data/text/mini /data/json/mini
```

## Train LM

In [8]:
from smtag.train.train_lm import (
    train as train_lm,
    TrainingArgumentsLM
)

In [9]:
no_cache = True
loader_path = "./smtag/loader/loader_lm.py"
data_config_name = "SEQ2SEQ"
tokenizer = config.tokenizer  # tokenizer has to be the same application-wide
model_type = "VAE"
from_pretrained = config.from_pretrained

In [9]:
tokenizer.mask_token = '<mask>'  # why is this here? maybe because in case of character-level tokenizer
# tokenizer.unk_token = '<unk>'

Model architecture parameters: 

```python
model_config = VAEConfigLM(
    freeze_pretrained=None,  # 'encoder' # 'both' # 'decoder' # None
    hidden_features=256,
    z_dim=96,
    gamma=1E-1,  # weight of lm loss as compared to z_loss
    sampling_iterations=200,
    seq_length=config.max_length,
    residuals=False,
    latent_var_loss="kl"  # "kl" or "mmd" or None
)
```

In [11]:
training_args_tokcl = TrainingArgumentsLM(
    num_train_epochs = 1,
    logging_steps = 100,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
)
training_args_tokcl

TrainingArgumentsLM(output_dir='/lm_models', overwrite_output_dir=True, do_train=False, do_eval=True, do_predict=False, evaluation_strategy=<IntervalStrategy.STEPS: 'steps'>, prediction_loss_only=True, per_device_train_batch_size=32, per_device_eval_batch_size=32, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=1, eval_accumulation_steps=None, learning_rate=5e-05, weight_decay=0.0, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=1, max_steps=-1, lr_scheduler_type=<SchedulerType.LINEAR: 'linear'>, warmup_ratio=0.0, warmup_steps=0, log_level=-1, log_level_replica=-1, log_on_each_node=True, logging_dir='/lm_models/runs/May26_15-41-57_8021e3d0dc73', logging_strategy=<IntervalStrategy.STEPS: 'steps'>, logging_first_step=False, logging_steps=100, logging_nan_inf_filter=True, save_strategy=<IntervalStrategy.STEPS: 'steps'>, save_steps=500, save_total_limit=5, save_on_each_node=False, no_cuda=False, seed=42, bf1

In [12]:
train_lm(
    training_args_tokcl,
    loader_path,
    data_config_name,
    tokenized_examples,
    no_cache,
    tokenizer,
    model_type,
    from_pretrained
)



tokenizer vocab size: 50265

Loading datasets found in /data/json/oapmc_title.
using ./smtag/loader/loader_lm.py as dataset loader.
Downloading and preparing dataset bio_lang/SEQ2SEQ (download: Unknown size, generated: 268.49 MiB, post-processed: Unknown size, total: 268.49 MiB) to /cache/bio_lang/SEQ2SEQ-1ddd9282e7145ba8/0.0.1/ff9dbe678aba55520bef244a55868bd4acf85eb086ce21a8454da23294256c2e...


0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

Dataset bio_lang downloaded and prepared to /cache/bio_lang/SEQ2SEQ-1ddd9282e7145ba8/0.0.1/ff9dbe678aba55520bef244a55868bd4acf85eb086ce21a8454da23294256c2e. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]


Training with 967335 examples.
Evaluating on 2970 examples.


Downloading:   0%|          | 0.00/532M [00:00<?, ?B/s]


Training arguments:
TrainingArgumentsLM(
_n_gpu=4,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
bf16=False,
bf16_full_eval=False,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_steps=100,
evaluation_strategy=IntervalStrategy.STEPS,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
greater_is_better=None,
group_by_length=False,
half_precision_backend=auto,
hub_model_id=None,
hub_strategy=HubStrategy.EVERY_SAVE,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
label_names=None,
label_smoothing_factor=0.0,
learning_rate=5e-05,
length_column_name=length,
load_best_model_at_end=False,
local_rank=-1,
log_level=-1,
log_level_replica=-1,
log_on_each_node=True,
logging_di

***** Running training *****
  Num examples = 967335
  Num Epochs = 1
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 128
  Gradient Accumulation steps = 1
  Total optimization steps = 7558


CUDA available: True
Available devices  4
Current cuda device  0


  0%|          | 0/7558 [00:00<?, ?it/s]



Step,Training Loss,Validation Loss,Supp Data Loss Z,Supp Data Loss Lm
100,45.4273,2.278832,1.455738,0.783757
200,8.8278,1.62458,0.898766,0.701679
300,6.249,1.20003,0.545309,0.640245
400,5.0505,1.034896,0.432444,0.591074
500,4.2082,0.974537,0.413509,0.550343
600,3.6945,0.985467,0.455868,0.517927
700,3.208,0.825795,0.324736,0.492802
800,2.7793,0.731545,0.24972,0.475507
900,2.4305,0.713125,0.245874,0.46109
1000,2.2022,0.673489,0.21876,0.449216


INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 45.4273, 'learning_rate': 4.9338449325218316e-05, 'epoch': 0.01}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 1.455737590789795, 'eval_supp_data_loss_lm': 0.7837565541267395, 'eval_loss': 2.278831958770752, 'eval_runtime': 5.5902, 'eval_samples_per_second': 531.29, 'eval_steps_per_second': 4.293, 'epoch': 0.01}


[32;1m<s>[0m[31;1m-[Gen][0m[31;1m-[ome][0m[31;1m of[wide][0m[31;1m-[identification][0m[31;1m of[and][0m[31;1m the[characterization][0m[32;1m of[0m[31;1m the[micro][0m[31;1m-[s][0m[31;1m-[atellite][0m[31;1m-[markers][0m[31;1m of[in][0m[31;1m the[black][0m[31;1m-[pepper][0m[31;1m-[(][0m[31;1m the[P][0m[31;1m-[iper][0m[31;1m-[n][0m[31;1m-[igr][0m[31;1m-[um][0m[31;1m</s>[):][0m[31;1m</s>[A][0m[31;1m-[valuable][0m[31;1m-[resource][0m[31;1m of[for][0m[31;1m the[boosting][0m[31;1m</s>[gen][0m[31;1m-[omics][0m[31;1m</s>[applications][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 8.8278, 'learning_rate': 4.867689865043662e-05, 'epoch': 0.03}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.898766040802002, 'eval_supp_data_loss_lm': 0.7016785740852356, 'eval_loss': 1.6245797872543335, 'eval_runtime': 5.3912, 'eval_samples_per_second': 550.896, 'eval_steps_per_second': 4.452, 'epoch': 0.03}


[32;1m<s>[0m[31;1mThe[Exp][0m[31;1min[ression][0m[32;1m of[0m[31;1m the[Concern][0m[31;1m of[:][0m[31;1m a[Caps][0m[31;1mic[aic][0m[31;1m-[in][0m[31;1m and[inhibits][0m[31;1m the[migration][0m[31;1m of[and][0m[31;1m the[invasion][0m[31;1m of[via][0m[32;1m the[0m[31;1m the[AM][0m[31;1m-[PK][0m[31;1m-[/][0m[31;1mA[NF][0m[32;1m-[0m[31;1mD[kB][0m[31;1m-[signaling][0m[31;1m</s>[pathway][0m[31;1m</s>[in][0m[31;1m the[es][0m[31;1min[oph][0m[31;1min[agus][0m[31;1m</s>[sequ][0m[31;1mic[amous][0m[31;1m and[cell][0m[31;1m-[carcin][0m[31;1min[oma][0m[31;1m</s>[by][0m[31;1m the[decreasing][0m[31;1m the[matrix][0m[31;1m-[met][0m[31;1mation[all][0m[31;1mic[op][0m[31;1min[rotein][0m[31;1m</s>[as

INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 6.249, 'learning_rate': 4.801534797565494e-05, 'epoch': 0.04}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.5453088879585266, 'eval_supp_data_loss_lm': 0.6402448415756226, 'eval_loss': 1.2000300884246826, 'eval_runtime': 5.646, 'eval_samples_per_second': 526.037, 'eval_steps_per_second': 4.251, 'epoch': 0.04}


[32;1m<s>[0m[31;1mThe[No][0m[31;1m-[Mod][0m[31;1mal[ulatory][0m[31;1m and[Effects][0m[31;1m of[when][0m[31;1m a[Stim][0m[31;1mal[ulating][0m[32;1m the[0m[31;1m the[Right][0m[31;1m and[In][0m[31;1mhib[fer][0m[31;1mon[ior][0m[31;1m and[Front][0m[31;1m-[al][0m[31;1m and[G][0m[31;1mas[yrus][0m[31;1m in[with][0m[31;1m the[Continuous][0m[31;1m and[6][0m[31;1m-[�][0m[31;1m�[�][0m[31;1m1[Hz][0m[31;1m-[t][0m[31;1m-[AC][0m[31;1m-[S][0m[31;1m-[and][0m[31;1m the[t][0m[31;1m-[R][0m[31;1m-[NS][0m[31;1m</s>[on][0m[31;1m the[Response][0m[31;1m to[In][0m[31;1mhib[hibition][0m[31;1m</s>[:][0m[32;1m A[0m[31;1m case[Behavioral][0m[31;1m study[Study][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 5.0505, 'learning_rate': 4.735379730087325e-05, 'epoch': 0.05}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.4324442446231842, 'eval_supp_data_loss_lm': 0.5910736918449402, 'eval_loss': 1.0348961353302002, 'eval_runtime': 5.6463, 'eval_samples_per_second': 526.012, 'eval_steps_per_second': 4.251, 'epoch': 0.05}


[32;1m<s>[0m[31;1mThe[E][0m[32;1mval[0m[32;1muation[0m[32;1m of[0m[32;1m the[0m[31;1m effect[Clinical][0m[32;1m and[0m[31;1m the[Economic][0m[31;1m and[B][0m[31;1macterial[urden][0m[32;1m of[0m[31;1m the[Poor][0m[31;1m and[Gly][0m[31;1mt[cemic][0m[31;1m and[Control][0m[31;1m in[Associated][0m[32;1m with[0m[31;1m the[Ther][0m[32;1mape[0m[31;1mpt[utic][0m[31;1m and[In][0m[31;1mhib[ert][0m[31;1mion[ia][0m[32;1m in[0m[31;1m the[Patients][0m[32;1m with[0m[31;1m the[Type][0m[31;1m 2[][0m[31;1m1[2][0m[31;1m-[Diabetes][0m[31;1m</s>[in][0m[31;1m a[the][0m[31;1m Patients[United][0m[31;1m and[States][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 4.2082, 'learning_rate': 4.6692246626091565e-05, 'epoch': 0.07}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.41350874304771423, 'eval_supp_data_loss_lm': 0.5503431558609009, 'eval_loss': 0.9745368957519531, 'eval_runtime': 5.5152, 'eval_samples_per_second': 538.515, 'eval_steps_per_second': 4.352, 'epoch': 0.07}


[32;1m<s>[0m[31;1mThe[Multiple][0m[31;1m-[s][0m[31;1murgical[cler][0m[31;1mop[osing][0m[31;1m and[hem][0m[31;1mat[ang][0m[32;1mi[0m[32;1moma[0m[31;1m in[of][0m[32;1m the[0m[31;1m treatment[right][0m[31;1m-[lung][0m[31;1m:[in][0m[31;1m the[a][0m[31;1m patient[23][0m[32;1m-[0m[32;1myear[0m[32;1m-[0m[31;1mterm[old][0m[31;1m population[female][0m[31;1m patients[patient][0m[31;1m with[:][0m[31;1m a[A][0m[32;1m case[0m[32;1m report[0m[31;1m</s>[and][0m[31;1m meta[review][0m[32;1m of[0m[32;1m the[0m[31;1m clinical[literature][0m[32;1m</s>[0m




Saving model checkpoint to /lm_models/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/lm_models/checkpoint-12000] due to args.save_total_limit
INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 3.6945, 'learning_rate': 4.603069595130987e-05, 'epoch': 0.08}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.45586785674095154, 'eval_supp_data_loss_lm': 0.5179269909858704, 'eval_loss': 0.9854666590690613, 'eval_runtime': 5.6597, 'eval_samples_per_second': 524.766, 'eval_steps_per_second': 4.241, 'epoch': 0.08}


[32;1m<s>[0m[31;1mThe[A][0m[31;1m novel[prospective][0m[32;1m study[0m[32;1m of[0m[31;1m the[shoulder][0m[31;1m cancer[pain][0m[31;1m and[in][0m[31;1m patients[primary][0m[32;1m care[0m[31;1m patients[:][0m[31;1m a[Pre][0m[31;1m-[val][0m[32;1mence[0m[31;1m and[of][0m[31;1m the[im][0m[31;1mplant[aged][0m[31;1m patients[pathology][0m[32;1m and[0m[31;1m the[response][0m[32;1m to[0m[31;1m the[guided][0m[31;1m care[diagnostic][0m[31;1m and[blocks][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 3.208, 'learning_rate': 4.5369145276528185e-05, 'epoch': 0.09}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.3247360587120056, 'eval_supp_data_loss_lm': 0.49280211329460144, 'eval_loss': 0.8257945775985718, 'eval_runtime': 5.4141, 'eval_samples_per_second': 548.563, 'eval_steps_per_second': 4.433, 'epoch': 0.09}


[32;1m<s>[0m[31;1mThe[N][0m[31;1mucle[urses][0m[32;1m�[0m[32;1m�[0m[31;1m and[knowledge][0m[31;1m of[and][0m[31;1m health[practices][0m[31;1m of[in][0m[32;1m the[0m[31;1m treatment[face][0m[32;1m of[0m[32;1m the[0m[31;1m treatment[challenge][0m[32;1m of[0m[31;1m the[using][0m[32;1m the[0m[31;1m treatment[system][0m[31;1m to[at][0m[31;1mical[ization][0m[32;1m of[0m[31;1m the[nursing][0m[31;1m and[care][0m[31;1m in[as][0m[31;1m a[an][0m[31;1m evaluation[instrument][0m[31;1m for[of][0m[31;1m the[assistance][0m[31;1m</s>[in][0m[31;1m the[a][0m[31;1m patient[first][0m[31;1m-[aid][0m[31;1m model[in][0m[31;1m the[Brazil][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 2.7793, 'learning_rate': 4.470759460174649e-05, 'epoch': 0.11}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.24971961975097656, 'eval_supp_data_loss_lm': 0.47550714015960693, 'eval_loss': 0.7315453290939331, 'eval_runtime': 5.5349, 'eval_samples_per_second': 536.599, 'eval_steps_per_second': 4.336, 'epoch': 0.11}


[32;1m<s>[0m[31;1mThe[T][0m[31;1mreatment[ACC][0m[31;1m-[3][0m[31;1m-[is][0m[32;1m a[0m[31;1m novel[micro][0m[31;1mRNA[tub][0m[31;1mular[ule][0m[31;1m for[plus][0m[31;1m a[end][0m[31;1mopl[-][0m[31;1mof[tracking][0m[31;1m of[protein][0m[31;1m for[that][0m[31;1m is[promotes][0m[31;1m the[ax][0m[31;1mial[on][0m[31;1m-[elong][0m[32;1mation[0m[32;1m and[0m[31;1m in[also][0m[31;1m in[regulates][0m[31;1m the[micro][0m[31;1mRNA[tub][0m[31;1mular[ule][0m[31;1m-[plus][0m[31;1m cell[end][0m[31;1m-[dynamics][0m[32;1m in[0m[31;1m patients[multiple][0m[31;1m cell[embryonic][0m[31;1m cells[cell][0m[31;1m carcin[types][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 2.4305, 'learning_rate': 4.4046043926964806e-05, 'epoch': 0.12}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.24587424099445343, 'eval_supp_data_loss_lm': 0.46108973026275635, 'eval_loss': 0.7131246328353882, 'eval_runtime': 5.5469, 'eval_samples_per_second': 535.438, 'eval_steps_per_second': 4.327, 'epoch': 0.12}


[32;1m<s>[0m[31;1mThe[�][0m[32;1m�[0m[31;1mThe[T][0m[31;1mreatment[ape][0m[31;1m-[dermat][0m[31;1mology[osc][0m[32;1mopy[0m[31;1m of[�][0m[32;1m�[0m[32;1m:[0m[31;1m a[constructing][0m[32;1m a[0m[31;1m new[low][0m[32;1m-[0m[31;1mdose[cost][0m[31;1m,[dermat][0m[31;1mology[oscope][0m[31;1m for[using][0m[32;1m a[0m[31;1m new[mobile][0m[31;1m-[phone][0m[31;1m-[,][0m[31;1m a[immersion][0m[31;1m,[fluid][0m[31;1m,[and][0m[31;1m the[transparent][0m[31;1m-[adhesive][0m[31;1m</s>[tape][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 2.2022, 'learning_rate': 4.338449325218312e-05, 'epoch': 0.13}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.21875999867916107, 'eval_supp_data_loss_lm': 0.449216365814209, 'eval_loss': 0.6734893321990967, 'eval_runtime': 5.6476, 'eval_samples_per_second': 525.887, 'eval_steps_per_second': 4.25, 'epoch': 0.13}


[32;1m<s>[0m[32;1mThe[0m[31;1m effect[Chinese][0m[31;1m version[Her][0m[31;1mpes[bal][0m[31;1m Health[Formula][0m[31;1m of[Shen][0m[31;1mter[z][0m[31;1mia[hu][0m[31;1m,[T][0m[31;1mum[ia][0m[31;1m,[op][0m[31;1mo[i][0m[31;1m ([Gran][0m[31;1mc[ule][0m[31;1m ([Results][0m[32;1m in[0m[31;1m the[Met][0m[31;1mabol[abolic][0m[31;1m and[Improvement][0m[31;1m of[in][0m[31;1m Patients[Type][0m[32;1m 2[0m[31;1m Diabetes[Di][0m[32;1mabetic[0m[31;1m Patients[Rats][0m[31;1m</s>[by][0m[31;1m the[Mod][0m[32;1mulating[0m[32;1m the[0m[31;1m Expression[Gut][0m[31;1m-[Micro][0m[32;1mbi[0m[32;1mota[0m[32;1m</s>[0m




Saving model checkpoint to /lm_models/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/lm_models/checkpoint-24000] due to args.save_total_limit
INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 2.0127, 'learning_rate': 4.2722942577401434e-05, 'epoch': 0.15}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.1849970817565918, 'eval_supp_data_loss_lm': 0.4400160312652588, 'eval_loss': 0.6296237707138062, 'eval_runtime': 5.5923, 'eval_samples_per_second': 531.09, 'eval_steps_per_second': 4.292, 'epoch': 0.15}


[32;1m<s>[0m[31;1mThe[Effect][0m[32;1m of[0m[31;1m a[ankle][0m[31;1m and[stretching][0m[31;1m on[combined][0m[32;1m with[0m[31;1m a[arm][0m[31;1m ar[cycling][0m[32;1m on[0m[32;1m the[0m[31;1m risk[improvement][0m[32;1m of[0m[31;1m the[calf][0m[31;1m and[muscle][0m[31;1m and[stiffness][0m[32;1m in[0m[32;1m patients[0m[32;1m with[0m[31;1m chronic[stroke][0m[31;1m</s>[:][0m[32;1m a[0m[31;1m systematic[pilot][0m[32;1m study[0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 1.8575, 'learning_rate': 4.206139190261974e-05, 'epoch': 0.16}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.19968722760677338, 'eval_supp_data_loss_lm': 0.4317455291748047, 'eval_loss': 0.6365070939064026, 'eval_runtime': 5.6729, 'eval_samples_per_second': 523.54, 'eval_steps_per_second': 4.231, 'epoch': 0.16}


[32;1m<s>[0m[31;1mThe[Pl][0m[32;1masma[0m[31;1m-[Micro][0m[31;1mbi[RNA][0m[32;1m-[0m[31;1mBased[21][0m[31;1m and[,][0m[31;1m a[26][0m[31;1m-[a][0m[32;1m,[0m[32;1m and[0m[31;1m E[29][0m[32;1ma[0m[31;1m Are[-][0m[32;1m3[0m[32;1mp[0m[31;1m Are[as][0m[31;1m a[Pred][0m[32;1mictive[0m[31;1m Factor[Mark][0m[32;1mers[0m[31;1m of[for][0m[31;1m the[Treatment][0m[31;1m of[Response][0m[31;1m to[Following][0m[31;1m Int[Trans][0m[31;1mplant[arter][0m[32;1mial[0m[31;1m My[Che][0m[31;1mmor[mo][0m[32;1memb[0m[32;1mol[0m[32;1mization[0m[31;1m of[in][0m[32;1m Patients[0m[32;1m with[0m[31;1m Type[Hep][0m[32;1mato[0m[32;1mcell[0m[32;1mular[0m[32;1m Car[0m[32;1mcin[0m[32;1moma[0m[32;1m</s>

INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 1.6912, 'learning_rate': 4.1399841227838055e-05, 'epoch': 0.17}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.15785063803195953, 'eval_supp_data_loss_lm': 0.4245401918888092, 'eval_loss': 0.5862813591957092, 'eval_runtime': 5.5296, 'eval_samples_per_second': 537.106, 'eval_steps_per_second': 4.34, 'epoch': 0.17}


[32;1m<s>[0m[31;1mThe[Small][0m[31;1m-[RNA][0m[31;1m-[Sequ][0m[32;1mencing[0m[31;1m of[Analysis][0m[32;1m of[0m[31;1m the[mi][0m[31;1mR[RNA][0m[31;1m-[Expression][0m[31;1m in[Reve][0m[32;1mals[0m[31;1m a[Novel][0m[31;1m Ant[Ins][0m[31;1mights[ih][0m[31;1myd[ts][0m[31;1m in[into][0m[31;1m the[Root][0m[31;1m-[Formation][0m[31;1m and[under][0m[31;1m the[Root][0m[31;1m-[Restrict][0m[32;1mion[0m[31;1m</s>[Cult][0m[31;1mured[ivation][0m[31;1m</s>[in][0m[31;1m the[Grape][0m[31;1m-[vine][0m[31;1m</s>[(][0m[31;1mC[V][0m[31;1mit[itis][0m[31;1m sat[v][0m[31;1mes[in][0m[31;1mum[ifer][0m[32;1ma[0m[31;1m)[L][0m[32;1m.)[0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 1.502, 'learning_rate': 4.073829055305637e-05, 'epoch': 0.19}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.1307850480079651, 'eval_supp_data_loss_lm': 0.4183826744556427, 'eval_loss': 0.5523537993431091, 'eval_runtime': 5.6377, 'eval_samples_per_second': 526.814, 'eval_steps_per_second': 4.257, 'epoch': 0.19}


[32;1m<s>[0m[31;1mThe[Performance][0m[32;1m of[0m[31;1m a[the][0m[31;1m R[Abbott][0m[31;1m-[Real][0m[31;1m-[Time][0m[31;1m Method[MT][0m[31;1mF[B][0m[31;1m-[R][0m[31;1misks[IF][0m[31;1m-[/][0m[31;1mMS[IN][0m[31;1m-[H][0m[31;1m/[resistance][0m[32;1m assay[0m[31;1m in[when][0m[31;1m in[used][0m[31;1m in[to][0m[31;1m assess[test][0m[31;1m the[My][0m[32;1mc[0m[32;1mob[0m[32;1macter[0m[32;1mium[0m[32;1m tuberculosis[0m[31;1m in[specimens][0m[31;1m in[from][0m[31;1m the[Bangladesh][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 1.3815, 'learning_rate': 4.0076739878274675e-05, 'epoch': 0.2}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.12562504410743713, 'eval_supp_data_loss_lm': 0.4126805067062378, 'eval_loss': 0.5413737893104553, 'eval_runtime': 5.5084, 'eval_samples_per_second': 539.175, 'eval_steps_per_second': 4.357, 'epoch': 0.2}


[32;1m<s>[0m[31;1mThe[Sm][0m[31;1moking[ad][0m[31;1m-[4][0m[31;1m-[in][0m[31;1m the[T][0m[31;1mum[cells][0m[31;1m is[plays][0m[32;1m a[0m[31;1m role[protective][0m[32;1m role[0m[32;1m in[0m[32;1m the[0m[31;1m proliferation[development][0m[32;1m of[0m[31;1m the[autoimmune][0m[31;1m diseases[S][0m[31;1mARS[j][0m[32;1mö[0m[32;1mgren[0m[32;1m's[0m[31;1m disease[syndrome][0m[31;1m:[in][0m[32;1m the[0m[31;1m rat[non][0m[31;1m-[ob][0m[31;1mesity[ese][0m[31;1m rat[diabetic][0m[31;1m rat[mouse][0m[31;1m model[</s>][0m




Saving model checkpoint to /lm_models/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/lm_models/checkpoint-36000] due to args.save_total_limit
INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 1.3095, 'learning_rate': 3.941518920349299e-05, 'epoch': 0.21}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.09868025779724121, 'eval_supp_data_loss_lm': 0.4072902202606201, 'eval_loss': 0.5083640813827515, 'eval_runtime': 5.6092, 'eval_samples_per_second': 529.485, 'eval_steps_per_second': 4.279, 'epoch': 0.21}


[32;1m<s>[0m[31;1mThe[Sub][0m[31;1mstrate[ac][0m[32;1mute[0m[31;1m and[Thy][0m[32;1mroid[0m[31;1m Cancer[itis][0m[31;1m in[is][0m[32;1m Associated[0m[32;1m with[0m[31;1m the[H][0m[31;1morm[LA][0m[32;1m-[0m[31;1m1[B][0m[31;1m1[*][0m[31;1mB[18][0m[31;1m and[:][0m[31;1m A[01][0m[31;1m-[,][0m[31;1m3[-][0m[31;1m3[DR][0m[31;1m-[B][0m[31;1m-[1][0m[31;1m,[*][0m[31;1m1[01][0m[31;1m,[and][0m[32;1m -[0m[31;1m3[C][0m[31;1m9[*][0m[31;1m3[04][0m[31;1m,[:][0m[31;1m3[01][0m[31;1m,[-][0m[31;1m5[The][0m[31;1m Role[Sign][0m[32;1mific[0m[32;1mance[0m[32;1m of[0m[32;1m the[0m[31;1m T[New][0m[31;1m Pro[Molecular][0m[31;1m Re[Background][0m[31;1m of[</s>][0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 1.2051, 'learning_rate': 3.87536385287113e-05, 'epoch': 0.22}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.10316312313079834, 'eval_supp_data_loss_lm': 0.402576208114624, 'eval_loss': 0.508370041847229, 'eval_runtime': 5.6762, 'eval_samples_per_second': 523.241, 'eval_steps_per_second': 4.228, 'epoch': 0.22}


[32;1m<s>[0m[31;1mThe[Effect][0m[32;1m of[0m[32;1m a[0m[31;1m novel[mass][0m[31;1m spect[radio][0m[31;1mfrequency[campaign][0m[32;1m on[0m[31;1m the[family][0m[31;1m health[behaviours][0m[32;1m and[0m[31;1m health[child][0m[31;1m health[survival][0m[32;1m in[0m[31;1m patients[Burk][0m[31;1mhold[ina][0m[32;1m Fas[0m[32;1mo[0m[32;1m:[0m[32;1m a[0m[31;1m systematic[repeated][0m[31;1m review[cross][0m[32;1m-[0m[32;1msectional[0m[31;1m study[,][0m[31;1m prospective[cluster][0m[32;1m-[0m[32;1mrandom[0m[31;1mized[ised][0m[31;1m,[trial][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 1.1491, 'learning_rate': 3.809208785392962e-05, 'epoch': 0.24}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.11059979349374771, 'eval_supp_data_loss_lm': 0.3983963429927826, 'eval_loss': 0.5118117332458496, 'eval_runtime': 5.5555, 'eval_samples_per_second': 534.608, 'eval_steps_per_second': 4.32, 'epoch': 0.24}


[32;1m<s>[0m[31;1mThe[Ne][0m[32;1muro[0m[31;1mprot[t][0m[31;1mrophic[oxicity][0m[32;1m of[0m[31;1m the[NM][0m[32;1mDA[0m[31;1m-[antagonists][0m[31;1m in[:][0m[32;1m a[0m[31;1m novel[glut][0m[31;1math[am][0m[31;1myl[ater][0m[32;1mg[0m[32;1mic[0m[31;1m and[theory][0m[31;1m-[of][0m[31;1m the[schizophrenia][0m[31;1m and[based][0m[32;1m on[0m[31;1m the[selective][0m[31;1m and[impairment][0m[32;1m of[0m[31;1m the[local][0m[31;1m and[inhibit][0m[32;1mory[0m[31;1m activity[feedback][0m[31;1m</s>[circuits][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 1.1009, 'learning_rate': 3.7430537179147924e-05, 'epoch': 0.25}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.09144443273544312, 'eval_supp_data_loss_lm': 0.3945960998535156, 'eval_loss': 0.4885094165802002, 'eval_runtime': 5.6728, 'eval_samples_per_second': 523.551, 'eval_steps_per_second': 4.231, 'epoch': 0.25}


[32;1m<s>[0m[31;1mThe[Consider][0m[32;1mations[0m[31;1m on[regarding][0m[31;1m the[treatment][0m[31;1m of[efficiency][0m[31;1m and[,][0m[31;1m safety[diss][0m[31;1mociation[oci][0m[31;1mations[ative][0m[31;1m effects[parts][0m[31;1m of[and][0m[31;1m the[diss][0m[31;1mociation[oci][0m[31;1mations[ative][0m[31;1m factors[am][0m[31;1myl[nesia][0m[31;1m in[for][0m[31;1m patients[Hunt][0m[31;1m's[j][0m[31;1mö[ens][0m[31;1m syndrome[et][0m[32;1m al[0m[32;1m.[0m[31;1m A[�][0m[31;1m�[�][0m[32;1ms[0m[31;1m disease[Sche][0m[31;1mletal[ma][0m[31;1m ([Therapy][0m[31;1m ([for][0m[31;1m the[Diss][0m[31;1mociation[oci][0m[32;1mative[0m[31;1m Diseases[Identity][0m[32;1m Disorder[0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 1.0695, 'learning_rate': 3.676898650436624e-05, 'epoch': 0.26}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.09011886268854141, 'eval_supp_data_loss_lm': 0.39106258749961853, 'eval_loss': 0.48361077904701233, 'eval_runtime': 5.5455, 'eval_samples_per_second': 535.572, 'eval_steps_per_second': 4.328, 'epoch': 0.26}


[32;1m<s>[0m[32;1mThe[0m[31;1m effect[Con][0m[31;1mventional[sequence][0m[32;1m of[0m[31;1m the[Imm][0m[32;1mune[0m[31;1m Respons[Supp][0m[31;1mression[ressive][0m[31;1m Factors[Cells][0m[32;1m in[0m[32;1m the[0m[31;1m Human[Use][0m[32;1m of[0m[31;1m a[Ther][0m[32;1mape[0m[32;1mutic[0m[31;1m Agents[Cancer][0m[31;1m Cells[Vacc][0m[31;1mine[ines][0m[31;1m:[and][0m[32;1m Their[0m[31;1m Role[Import][0m[32;1mance[0m[31;1m for[in][0m[31;1m the[Imm][0m[32;1mune[0m[31;1m Response[Monitoring][0m[32;1m</s>[0m




Saving model checkpoint to /lm_models/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/lm_models/checkpoint-48000] due to args.save_total_limit
INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 1.036, 'learning_rate': 3.6107435829584545e-05, 'epoch': 0.28}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.06666389107704163, 'eval_supp_data_loss_lm': 0.3879471719264984, 'eval_loss': 0.45636147260665894, 'eval_runtime': 5.6124, 'eval_samples_per_second': 529.187, 'eval_steps_per_second': 4.276, 'epoch': 0.28}


[32;1m<s>[0m[31;1mThe[Pre][0m[32;1mval[0m[32;1mence[0m[31;1m and[,][0m[31;1m risk[intensity][0m[32;1m and[0m[32;1m risk[0m[32;1m factors[0m[32;1m of[0m[31;1m acute[t][0m[31;1mib[ung][0m[31;1mular[iasis][0m[32;1m in[0m[31;1m the[Kil][0m[31;1mmo[ifi][0m[31;1m,[County][0m[32;1m,[0m[31;1m China[Kenya][0m[31;1m:[II][0m[32;1m:[0m[31;1m a[Results][0m[32;1m from[0m[32;1m a[0m[31;1m cross[school][0m[32;1m-[0m[32;1mbased[0m[31;1m cohort[observational][0m[32;1m study[0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 1.0062, 'learning_rate': 3.544588515480286e-05, 'epoch': 0.29}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.07617712020874023, 'eval_supp_data_loss_lm': 0.3851659595966339, 'eval_loss': 0.46340084075927734, 'eval_runtime': 5.6158, 'eval_samples_per_second': 528.861, 'eval_steps_per_second': 4.274, 'epoch': 0.29}


[32;1m<s>[0m[31;1mThe[Compar][0m[32;1mison[0m[32;1m of[0m[31;1m the[skin][0m[31;1m-[dose][0m[31;1m and[measurement][0m[31;1m and[using][0m[31;1m a[nano][0m[31;1m-[D][0m[31;1m-[ot][0m[31;1m-[®][0m[31;1m and[dos][0m[31;1mim[imeter][0m[32;1m and[0m[31;1m a[machine][0m[31;1m learning[readings][0m[31;1m for[of][0m[31;1m the[radiation][0m[31;1m-[dose][0m[31;1m in[during][0m[31;1m the[cardiac][0m[31;1m surgery[cat][0m[32;1mheter[0m[32;1mization[0m[31;1m</s>[in][0m[31;1m patients[children][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.9803, 'learning_rate': 3.478433448002117e-05, 'epoch': 0.3}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.07755330204963684, 'eval_supp_data_loss_lm': 0.3826967179775238, 'eval_loss': 0.462399959564209, 'eval_runtime': 5.6676, 'eval_samples_per_second': 524.029, 'eval_steps_per_second': 4.235, 'epoch': 0.3}


[32;1m<s>[0m[31;1mThe[Coll][0m[32;1magen[0m[31;1m-[triple][0m[31;1m-[hel][0m[31;1mmin[ix][0m[31;1m-[repeat][0m[31;1m-[containing][0m[31;1m a[1][0m[31;1m,[is][0m[31;1m a[overe][0m[32;1mxp[0m[31;1mression[ressed][0m[32;1m in[0m[31;1m the[hep][0m[32;1mato[0m[32;1mcell[0m[32;1mular[0m[32;1m carcin[0m[32;1moma[0m[31;1m cells[and][0m[31;1m is[promotes][0m[31;1m apopt[cell][0m[32;1m proliferation[0m[32;1m and[0m[31;1m invasion[mot][0m[32;1mility[0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.9387, 'learning_rate': 3.4122783805239486e-05, 'epoch': 0.32}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.06573893129825592, 'eval_supp_data_loss_lm': 0.3804208040237427, 'eval_loss': 0.4479807913303375, 'eval_runtime': 5.5334, 'eval_samples_per_second': 536.74, 'eval_steps_per_second': 4.337, 'epoch': 0.32}


[32;1m<s>[0m[31;1mThe[Correction][0m[32;1m:[0m[31;1m The[f][0m[31;1mMRI[Mi][0m[31;1mR[RNA][0m[32;1m-[0m[31;1m1[192][0m[31;1m-[and][0m[32;1m mi[0m[31;1mR[RNA][0m[32;1m-[0m[31;1m21[204][0m[31;1m are[Direct][0m[32;1mly[0m[31;1m Reg[Supp][0m[31;1mresses[ress][0m[31;1m the[l][0m[32;1mnc[0m[32;1mRNA[0m[31;1m-[H][0m[31;1meter[OTT][0m[31;1m1[IP][0m[31;1m1[and][0m[31;1m Supp[Inter][0m[31;1maction[rupt][0m[31;1ms[G][0m[31;1mTP[LS][0m[32;1m1[0m[31;1m Expression[-][0m[32;1mMed[0m[32;1miated[0m[31;1m Ap[Gl][0m[31;1muc[ut][0m[31;1mamate[am][0m[31;1myl[ino][0m[32;1mly[0m[32;1msis[0m[32;1m in[0m[31;1m Human[Hep][0m[32;1mato[0m[32;1mcell[0m[32;1mular[0m[32;1m Car[0m[32;1mcin[0m[32;1m

INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.9136, 'learning_rate': 3.346123313045779e-05, 'epoch': 0.33}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.060078684240579605, 'eval_supp_data_loss_lm': 0.3781735897064209, 'eval_loss': 0.43991875648498535, 'eval_runtime': 5.5747, 'eval_samples_per_second': 532.765, 'eval_steps_per_second': 4.305, 'epoch': 0.33}


[32;1m<s>[0m[31;1mThe[A][0m[31;1m Novel[Dist][0m[32;1minct[0m[31;1m Role[Ur][0m[32;1minary[0m[31;1m T[Bi][0m[32;1mom[0m[32;1mark[0m[32;1mer[0m[31;1m of[Pattern][0m[31;1m of[Character][0m[31;1mistics[istic][0m[32;1m of[0m[31;1m the[Female][0m[31;1m Patients[Fab][0m[32;1mry[0m[31;1m-[Patients][0m[31;1m with[That][0m[31;1m Have[Mir][0m[31;1mac[rors][0m[31;1m the[Response][0m[32;1m to[0m[31;1m the[En][0m[31;1mz[zyme][0m[31;1m-[Replacement][0m[31;1m</s>[Therapy][0m[32;1m</s>[0m




Saving model checkpoint to /lm_models/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/lm_models/checkpoint-60000] due to args.save_total_limit
INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.8891, 'learning_rate': 3.279968245567611e-05, 'epoch': 0.34}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.06634222716093063, 'eval_supp_data_loss_lm': 0.37623894214630127, 'eval_loss': 0.4444561302661896, 'eval_runtime': 5.6292, 'eval_samples_per_second': 527.61, 'eval_steps_per_second': 4.264, 'epoch': 0.34}


[32;1m<s>[0m[31;1mThe[Child][0m[31;1mhood[Health][0m[31;1m and[in][0m[32;1m the[0m[31;1m United[Per][0m[31;1mipher[uvian][0m[31;1m Republic[Amazon][0m[32;1m:[0m[31;1m A[Pre][0m[32;1mval[0m[32;1mence[0m[32;1m and[0m[31;1m Risk[Factors][0m[32;1m Associated[0m[32;1m with[0m[31;1m the[Referred][0m[31;1m and[Mor][0m[32;1mbid[0m[32;1mity[0m[32;1m and[0m[31;1m Risk[Health][0m[31;1m-[Care][0m[31;1m Use[Access][0m[31;1m</s>[in][0m[32;1m the[0m[31;1m United[City][0m[32;1m of[0m[31;1m Edinburgh[I][0m[31;1mso[ñ][0m[31;1mob[ap][0m[31;1mar[ari][0m[31;1m,[</s>][0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.8677, 'learning_rate': 3.213813178089442e-05, 'epoch': 0.36}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.055309753865003586, 'eval_supp_data_loss_lm': 0.3743835985660553, 'eval_loss': 0.43127819895744324, 'eval_runtime': 5.5769, 'eval_samples_per_second': 532.557, 'eval_steps_per_second': 4.303, 'epoch': 0.36}


[32;1m<s>[0m[31;1mThe[Cut][0m[31;1maneous[ting][0m[31;1m the[a][0m[31;1m novel[Long][0m[31;1mitudinal[Story][0m[31;1m:[Short][0m[31;1m:[?][0m[32;1m The[0m[31;1m Role[Clinical][0m[31;1m and[Re][0m[32;1mlev[0m[32;1mance[0m[32;1m of[0m[31;1m the[As][0m[31;1mper[king][0m[31;1m for[Parents][0m[31;1m and[,][0m[31;1m and[Nurs][0m[32;1mes[0m[32;1m,[0m[32;1m and[0m[31;1m Children[Young][0m[31;1m Adults[Children][0m[31;1m in[Them][0m[31;1m to[selves][0m[32;1m to[0m[31;1m the[Ident][0m[32;1mify[0m[31;1m the[Children][0m[31;1m with['s][0m[31;1m Health[Mental][0m[32;1m Health[0m[31;1m</s>[Problems][0m[31;1m</s>[by][0m[31;1m the[One][0m[31;1m-[or][0m[31;1m a[Two][0m[31;1m-[Questions][0m[32

INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.8449, 'learning_rate': 3.147658110611273e-05, 'epoch': 0.37}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.04545089974999428, 'eval_supp_data_loss_lm': 0.3726274073123932, 'eval_loss': 0.41941097378730774, 'eval_runtime': 5.5651, 'eval_samples_per_second': 533.684, 'eval_steps_per_second': 4.313, 'epoch': 0.37}


[32;1m<s>[0m[31;1mThe[My][0m[31;1mocard[c][0m[32;1mob[0m[31;1macter[acterial][0m[31;1m and[gly][0m[31;1mcop[col][0m[31;1mic[ip][0m[32;1mids[0m[31;1m in[di][0m[31;1mhyd[-][0m[31;1ml[O][0m[32;1m-[0m[31;1ml[acy][0m[31;1ml[lated][0m[31;1m protein[tre][0m[31;1mp[hal][0m[31;1mos[ose][0m[31;1m-[and][0m[31;1m its[tri][0m[32;1m-[0m[31;1mhyd[O][0m[32;1m-[0m[31;1mmethyl[acy][0m[31;1ml[lated][0m[31;1m poly[tre][0m[31;1mp[hal][0m[32;1mose[0m[31;1m:[down][0m[32;1mreg[0m[32;1mulate[0m[31;1m the[in][0m[31;1m vitro[duc][0m[32;1mible[0m[31;1m protein[nit][0m[32;1mric[0m[32;1m oxide[0m[31;1m production[synth][0m[32;1mase[0m[31;1m activity[and][0m[31;1m the[nit][0m[32;1mric[0m[32;1m oxide[

INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.8206, 'learning_rate': 3.081503043133104e-05, 'epoch': 0.38}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.046061720699071884, 'eval_supp_data_loss_lm': 0.3712327480316162, 'eval_loss': 0.41867882013320923, 'eval_runtime': 5.6093, 'eval_samples_per_second': 529.48, 'eval_steps_per_second': 4.279, 'epoch': 0.38}


[32;1m<s>[0m[31;1mThe[Mult][0m[31;1mic[ivariate][0m[31;1m Analysis[Gen][0m[31;1momic[ome][0m[31;1m Analysis[-][0m[32;1mWide[0m[31;1m Analysis[Association][0m[31;1m Study[An][0m[32;1malyses[0m[32;1m Reve[0m[32;1mal[0m[32;1m the[0m[31;1m Role[Genetic][0m[31;1m Diversity[Bas][0m[32;1mis[0m[32;1m of[0m[31;1m the[Seed][0m[31;1mling[Fat][0m[32;1mty[0m[32;1m Acid[0m[31;1m-[Com][0m[32;1mposition[0m[32;1m in[0m[31;1m the[O][0m[31;1mste[at][0m[31;1m-[(][0m[31;1mL[A][0m[31;1mry[ven][0m[32;1ma[0m[32;1m sat[0m[32;1miva[0m[32;1m L[0m[32;1m.)[0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.7959, 'learning_rate': 3.0153479756549352e-05, 'epoch': 0.4}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.043386634439229965, 'eval_supp_data_loss_lm': 0.36915454268455505, 'eval_loss': 0.4138941168785095, 'eval_runtime': 5.6628, 'eval_samples_per_second': 524.475, 'eval_steps_per_second': 4.238, 'epoch': 0.4}


[32;1m<s>[0m[31;1mThe[Consider][0m[32;1mations[0m[31;1m of[regarding][0m[31;1m the[treatment][0m[31;1m of[efficiency][0m[31;1m of[,][0m[31;1m treatment[diss][0m[31;1mociation[oci][0m[32;1mative[0m[31;1m effects[parts][0m[31;1m of[and][0m[31;1m clinical[diss][0m[31;1mociation[oci][0m[32;1mative[0m[31;1m effects[am][0m[31;1myl[nesia][0m[31;1m in[for][0m[31;1m patients[Hunt][0m[31;1m syndrome[j][0m[31;1mö[ens][0m[31;1m syndrome[et][0m[32;1m al[0m[32;1m.[0m[31;1m A[�][0m[32;1m�[0m[32;1ms[0m[31;1m �[Sche][0m[31;1mng[ma][0m[31;1m ([Therapy][0m[31;1m ([for][0m[31;1m Alzheimer[Diss][0m[31;1mociation[oci][0m[32;1mative[0m[31;1m Diseases[Identity][0m[32;1m Disorder[0m[31;1m ([</s>][0m




Saving model checkpoint to /lm_models/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/lm_models/checkpoint-500] due to args.save_total_limit
INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.7738, 'learning_rate': 2.9491929081767666e-05, 'epoch': 0.41}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.0363537073135376, 'eval_supp_data_loss_lm': 0.3678407371044159, 'eval_loss': 0.40538638830184937, 'eval_runtime': 5.5842, 'eval_samples_per_second': 531.855, 'eval_steps_per_second': 4.298, 'epoch': 0.41}


[32;1m<s>[0m[31;1mThe[Correction][0m[32;1m:[0m[31;1m A[f][0m[31;1mMRI[Mi][0m[31;1mR[RNA][0m[32;1m-[0m[31;1m5[192][0m[31;1m-[and][0m[32;1m mi[0m[32;1mRNA[0m[32;1m-[0m[31;1m146[204][0m[31;1m are[Direct][0m[32;1mly[0m[31;1m Reg[Supp][0m[31;1mresses[ress][0m[31;1m the[l][0m[32;1mnc[0m[32;1mRNA[0m[31;1m-[H][0m[31;1meter[OTT][0m[31;1m-[IP][0m[31;1m1[and][0m[31;1m Prom[Inter][0m[31;1macts[rupt][0m[31;1ms[G][0m[31;1mSK[LS][0m[32;1m1[0m[31;1m Expression[-][0m[32;1mMed[0m[32;1miated[0m[31;1m Ap[Gl][0m[31;1muc[ut][0m[31;1math[am][0m[31;1mater[ino][0m[32;1mly[0m[32;1msis[0m[32;1m in[0m[31;1m Human[Hep][0m[32;1mato[0m[32;1mcell[0m[32;1mular[0m[32;1m Car[0m[32;1mcin[0m[32;1moma[

INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.7532, 'learning_rate': 2.8830378406985976e-05, 'epoch': 0.42}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.04331038519740105, 'eval_supp_data_loss_lm': 0.36662086844444275, 'eval_loss': 0.41124922037124634, 'eval_runtime': 5.6263, 'eval_samples_per_second': 527.873, 'eval_steps_per_second': 4.266, 'epoch': 0.42}


[32;1m<s>[0m[32;1mThe[0m[31;1m Effect[association][0m[32;1m between[0m[31;1m the[time][0m[31;1m of[to][0m[31;1m life[antibiotics][0m[32;1m and[0m[31;1m the[relevant][0m[31;1m risk[clinical][0m[32;1m outcomes[0m[32;1m in[0m[31;1m patients[emergency][0m[32;1m department[0m[32;1m patients[0m[32;1m with[0m[31;1m type[various][0m[31;1m disease[stages][0m[32;1m of[0m[31;1m renal[se][0m[32;1mps[0m[32;1mis[0m[31;1m</s>[:][0m[32;1m a[0m[31;1m retrospective[prospective][0m[31;1m cohort[multi][0m[32;1m-[0m[32;1mcenter[0m[32;1m study[0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.737, 'learning_rate': 2.816882773220429e-05, 'epoch': 0.44}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.039946477860212326, 'eval_supp_data_loss_lm': 0.3653295934200287, 'eval_loss': 0.4065241515636444, 'eval_runtime': 5.5342, 'eval_samples_per_second': 536.664, 'eval_steps_per_second': 4.337, 'epoch': 0.44}


[32;1m<s>[0m[31;1mThe[Vari][0m[31;1mation[ations][0m[31;1m in[of][0m[31;1m the[Essential][0m[32;1m Oil[0m[31;1m and[Const][0m[32;1mitu[0m[32;1ments[0m[32;1m in[0m[31;1m the[Ore][0m[31;1moch[gan][0m[32;1mo[0m[32;1m ([0m[31;1mL[Orig][0m[31;1mina[an][0m[31;1mthus[um][0m[31;1m aest[vul][0m[32;1mg[0m[32;1mare[0m[31;1m L[sub][0m[32;1msp[0m[32;1m.[0m[31;1m n[vir][0m[31;1mg[id][0m[31;1mum[ulum][0m[31;1m)[(=][0m[31;1m L[O][0m[31;1mry[.][0m[31;1m l[her][0m[31;1mn[acle][0m[31;1mae[otic][0m[31;1mus[um][0m[32;1m)[0m[31;1m and[over][0m[31;1m the[Cult][0m[31;1mured[ivation][0m[31;1m and[Cy][0m[31;1mt[cles][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.7212, 'learning_rate': 2.7507277057422597e-05, 'epoch': 0.45}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.04147766903042793, 'eval_supp_data_loss_lm': 0.3639678955078125, 'eval_loss': 0.406694620847702, 'eval_runtime': 5.4184, 'eval_samples_per_second': 548.13, 'eval_steps_per_second': 4.429, 'epoch': 0.45}


[32;1m<s>[0m[31;1mThe[I][0m[31;1mon[ne][0m[31;1mur[ffect][0m[31;1mive[iveness][0m[32;1m of[0m[31;1m a[lateral][0m[31;1m thor[-][0m[31;1minf[w][0m[31;1malled[edge][0m[31;1m ultrasound[ins][0m[31;1muff[oles][0m[31;1m in[on][0m[32;1m the[0m[31;1m treatment[improvement][0m[32;1m of[0m[31;1m the[pain][0m[32;1m and[0m[31;1m pain[function][0m[31;1m in[for][0m[31;1m patients[medial][0m[31;1m vent[knee][0m[32;1m oste[0m[32;1mo[0m[32;1marth[0m[32;1mritis[0m[32;1m:[0m[32;1m a[0m[31;1m systematic[meta][0m[32;1m-[0m[32;1manalysis[0m[31;1m</s>[of][0m[31;1m randomized[controlled][0m[31;1m trials[randomized][0m[31;1m controlled[trials][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.7136, 'learning_rate': 2.684572638264091e-05, 'epoch': 0.46}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.04231758043169975, 'eval_supp_data_loss_lm': 0.36263203620910645, 'eval_loss': 0.40628623962402344, 'eval_runtime': 5.613, 'eval_samples_per_second': 529.125, 'eval_steps_per_second': 4.276, 'epoch': 0.46}


[32;1m<s>[0m[31;1mThe[Sec][0m[31;1mular[rets][0m[32;1m of[0m[32;1m the[0m[31;1m �[Sea][0m[31;1m-[Ur][0m[31;1minary[chin][0m[31;1m ([Sp][0m[31;1mine[ic][0m[31;1mill[ule][0m[31;1m:[Reve][0m[31;1mals[aled][0m[31;1m by[:][0m[31;1m A[Protein][0m[31;1m-[Cooper][0m[31;1mations[ativity][0m[31;1m and[Is][0m[31;1m Associated[Respons][0m[32;1mible[0m[32;1m for[0m[31;1m the[ACC][0m[31;1m-[Transformation][0m[31;1m in[,][0m[31;1m and[Int][0m[31;1mestinal[rac][0m[31;1mran[ry][0m[31;1mlate[stall][0m[31;1mization[ine][0m[31;1m Ox[Inc][0m[32;1morpor[0m[32;1mation[0m[32;1m,[0m[32;1m and[0m[31;1m Ant[Gu][0m[32;1mided[0m[31;1m by[Mineral][0m[31;1mization[Part][0m[32;1micle[0m[31;1m Formation[Assem

Saving model checkpoint to /lm_models/checkpoint-3500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/lm_models/checkpoint-1000] due to args.save_total_limit
INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.6999, 'learning_rate': 2.618417570785922e-05, 'epoch': 0.48}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.032561708241701126, 'eval_supp_data_loss_lm': 0.36139702796936035, 'eval_loss': 0.3950004577636719, 'eval_runtime': 5.6812, 'eval_samples_per_second': 522.776, 'eval_steps_per_second': 4.224, 'epoch': 0.48}


[32;1m<s>[0m[31;1mThe[Sub][0m[31;1mclinical[ac][0m[32;1mute[0m[31;1m and[Thy][0m[32;1mroid[0m[31;1mectomy[itis][0m[31;1m in[is][0m[32;1m Associated[0m[32;1m with[0m[31;1m Increased[H][0m[31;1meter[LA][0m[32;1m-[0m[31;1m1[B][0m[31;1m1[*][0m[31;1mA[18][0m[31;1m and[:][0m[31;1m A[01][0m[31;1m-[,][0m[31;1m a[-][0m[31;1m18[DR][0m[32;1mB[0m[32;1m1[0m[31;1m,[*][0m[31;1m and[01][0m[31;1m,[and][0m[32;1m -[0m[31;1m10[C][0m[31;1mX[*][0m[31;1m00[04][0m[32;1m:[0m[31;1m11[01][0m[32;1m-[0m[31;1mD[The][0m[31;1m Role[Sign][0m[32;1mific[0m[32;1mance[0m[32;1m of[0m[32;1m the[0m[31;1m H[New][0m[31;1m Ins[Molecular][0m[31;1m Network[Background][0m[31;1m of[</s>][0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.6885, 'learning_rate': 2.5522625033077535e-05, 'epoch': 0.49}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.027882931753993034, 'eval_supp_data_loss_lm': 0.3604024350643158, 'eval_loss': 0.3891984224319458, 'eval_runtime': 5.7338, 'eval_samples_per_second': 517.978, 'eval_steps_per_second': 4.186, 'epoch': 0.49}


[32;1m<s>[0m[31;1mThe[R][0m[31;1misk[ational][0m[32;1me[0m[32;1m and[0m[31;1m design[study][0m[31;1m protocol[design][0m[31;1m of[for][0m[31;1m a[one][0m[32;1m-[0m[31;1myear[stop][0m[31;1m random[assessment][0m[32;1m of[0m[31;1m the[renal][0m[31;1m function[artery][0m[31;1m bypass[sten][0m[32;1mosis[0m[31;1m in[and][0m[31;1m its[renal][0m[31;1m function[micro][0m[32;1mv[0m[32;1mascular[0m[31;1m endot[perf][0m[32;1musion[0m[31;1m in[with][0m[31;1m a[contrast][0m[32;1m-[0m[32;1menh[0m[32;1manced[0m[31;1m radi[ultrasound][0m[31;1m</s>[for][0m[31;1m the[patients][0m[32;1m with[0m[31;1m advanced[suspected][0m[31;1m acute[renov][0m[31;1mative[ascular][0m[31;1mization[hypertension][0m[32

INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.6577, 'learning_rate': 2.4861074358295845e-05, 'epoch': 0.5}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.021216275170445442, 'eval_supp_data_loss_lm': 0.3594028055667877, 'eval_loss': 0.3813721239566803, 'eval_runtime': 5.6371, 'eval_samples_per_second': 526.868, 'eval_steps_per_second': 4.258, 'epoch': 0.5}


[32;1m<s>[0m[31;1mThe[S][0m[31;1must[ero][0m[32;1m-[0m[31;1mtem[ident][0m[31;1mical[ification][0m[32;1m of[0m[31;1m a[the][0m[31;1m novel[a][0m[31;1mort[et][0m[31;1miology[i][0m[31;1mogenic[ologies][0m[32;1m of[0m[31;1m the[human][0m[31;1m pap[malaria][0m[31;1m virus[exposure][0m[31;1m and[(][0m[31;1mH[Pl][0m[32;1mas[0m[32;1mmod[0m[32;1mium[0m[31;1m fal[s][0m[32;1mpp[0m[32;1m.)[0m[31;1m and[in][0m[32;1m the[0m[31;1m United[Lim][0m[31;1mb[u][0m[31;1m,[K][0m[31;1mwa[oss][0m[31;1mu[a][0m[31;1m region[District][0m[31;1m,[of][0m[31;1m China[Jim][0m[32;1mma[0m[31;1m,[Zone][0m[32;1m,[0m[31;1m Ethiopia[South][0m[31;1m Africa[western][0m[32;1m Ethiopia[0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.6335, 'learning_rate': 2.419952368351416e-05, 'epoch': 0.52}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.02769063599407673, 'eval_supp_data_loss_lm': 0.3584207594394684, 'eval_loss': 0.38705262541770935, 'eval_runtime': 5.6016, 'eval_samples_per_second': 530.205, 'eval_steps_per_second': 4.284, 'epoch': 0.52}


[32;1m<s>[0m[31;1mThe[Pred][0m[32;1miction[0m[32;1m of[0m[31;1m the[30][0m[32;1m-[0m[32;1mday[0m[31;1m mortality[pediatric][0m[31;1m mortality[un][0m[31;1mmet[planned][0m[32;1m hospital[0m[31;1m stay[izations][0m[31;1m in[using][0m[31;1m a[the][0m[31;1m first[Johns][0m[31;1mful[Hopkins][0m[31;1m Medical[Adjusted][0m[31;1m Card[Clinical][0m[31;1m Out[Groups][0m[31;1m ([risk][0m[31;1m score[adjustment][0m[31;1m questionnaire[system][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.6232, 'learning_rate': 2.353797300873247e-05, 'epoch': 0.53}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.022430196404457092, 'eval_supp_data_loss_lm': 0.3573106527328491, 'eval_loss': 0.38055309653282166, 'eval_runtime': 5.4728, 'eval_samples_per_second': 542.679, 'eval_steps_per_second': 4.385, 'epoch': 0.53}


[32;1m<s>[0m[31;1mThe[Primary][0m[31;1m and[Should][0m[32;1mer[0m[32;1m Ar[0m[32;1mthro[0m[32;1mpl[0m[32;1masty[0m[31;1m for[Versus][0m[31;1m Int[Conservative][0m[31;1m Surgery[Treatment][0m[32;1m for[0m[31;1m Ac[Com][0m[31;1mor[min][0m[31;1meral[uted][0m[31;1m Cor[Pro][0m[31;1mstate[x][0m[32;1mimal[0m[31;1m L[Hum][0m[31;1moral[eral][0m[31;1m Ar[Fract][0m[31;1mure[ures][0m[32;1m:[0m[32;1m A[0m[31;1m Retro[System][0m[32;1matic[0m[31;1m Review[Literature][0m[32;1m Review[0m[32;1m</s>[0m




Saving model checkpoint to /lm_models/checkpoint-4000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/lm_models/checkpoint-1500] due to args.save_total_limit
INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.6125, 'learning_rate': 2.287642233395078e-05, 'epoch': 0.54}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.02574644424021244, 'eval_supp_data_loss_lm': 0.35653021931648254, 'eval_loss': 0.3832088112831116, 'eval_runtime': 5.6862, 'eval_samples_per_second': 522.317, 'eval_steps_per_second': 4.221, 'epoch': 0.54}


[32;1m<s>[0m[31;1mThe[C][0m[31;1mognitive[ave][0m[32;1molin[0m[32;1m-[0m[32;1m1[0m[31;1m-[-][0m[32;1m A[0m[32;1m Novel[0m[31;1m Ant[Inter][0m[31;1maction[acting][0m[31;1m Protein[Partner][0m[31;1m with[of][0m[31;1m the[Organic][0m[31;1m Comp[C][0m[32;1mation[0m[31;1mic[/][0m[31;1mPoly[C][0m[31;1mou[arn][0m[32;1mit[0m[32;1mine[0m[31;1m and[Trans][0m[32;1mporter[0m[31;1m 1[(][0m[31;1mC[Oct][0m[31;1m2[n][0m[31;1m)[2][0m[31;1m)[):][0m[31;1m A[Effect][0m[31;1m on[of][0m[31;1m the[Protein][0m[32;1m Kin[0m[32;1mase[0m[31;1m ([C][0m[31;1m and[on][0m[31;1m the[This][0m[31;1m Mechan[Inter][0m[32;1maction[0m[31;1m</s>[in][0m[31;1m the[Rat][0m[31;1m Brain[Astro][0m[32;1mcy[0m[32;1mt

INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.6046, 'learning_rate': 2.2214871659169094e-05, 'epoch': 0.56}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.025202862918376923, 'eval_supp_data_loss_lm': 0.35520002245903015, 'eval_loss': 0.38129258155822754, 'eval_runtime': 5.4891, 'eval_samples_per_second': 541.075, 'eval_steps_per_second': 4.372, 'epoch': 0.56}


[32;1m<s>[0m[31;1mThe[Incre][0m[32;1masing[0m[31;1m the[Serious][0m[31;1m Diseases[ness][0m[32;1m of[0m[31;1m the[Plant][0m[31;1m Growth[Inv][0m[31;1molve[asions][0m[32;1m in[0m[31;1m the[Cro][0m[31;1mhn[pl][0m[31;1masty[ands][0m[31;1m:[of][0m[31;1m the[Eastern][0m[32;1m China[0m[31;1m:[in][0m[31;1m the[Rel][0m[32;1mation[0m[32;1m to[0m[31;1m the[Changing][0m[31;1m Environmental[Farming][0m[31;1m and[Practices][0m[31;1m</s>[:][0m[32;1m A[0m[31;1m Cross[Case][0m[32;1m Study[0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5961, 'learning_rate': 2.1553320984387404e-05, 'epoch': 0.57}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.022854924201965332, 'eval_supp_data_loss_lm': 0.35509398579597473, 'eval_loss': 0.37880855798721313, 'eval_runtime': 5.628, 'eval_samples_per_second': 527.721, 'eval_steps_per_second': 4.264, 'epoch': 0.57}


[32;1m<s>[0m[31;1mThe[Vari][0m[31;1mation[ations][0m[31;1m in[of][0m[31;1m the[Essential][0m[32;1m Oil[0m[31;1m and[Const][0m[32;1mitu[0m[32;1ments[0m[32;1m in[0m[31;1m the[Ore][0m[31;1moch[gan][0m[32;1mo[0m[32;1m ([0m[31;1mL[Orig][0m[31;1mina[an][0m[31;1mthus[um][0m[31;1m aest[vul][0m[32;1mg[0m[32;1mare[0m[31;1m L[sub][0m[32;1msp[0m[32;1m.[0m[31;1m n[vir][0m[31;1mg[id][0m[31;1mum[ulum][0m[31;1m L[(=][0m[31;1m L[O][0m[31;1mry[.][0m[31;1m sp[her][0m[31;1mnia[acle][0m[31;1mum[otic][0m[31;1mus[um][0m[32;1m)[0m[31;1m and[over][0m[31;1m the[Cult][0m[31;1mured[ivation][0m[31;1m and[Cy][0m[31;1mt[cles][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5891, 'learning_rate': 2.0891770309605714e-05, 'epoch': 0.58}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.019005203619599342, 'eval_supp_data_loss_lm': 0.3539273738861084, 'eval_loss': 0.37368693947792053, 'eval_runtime': 5.5509, 'eval_samples_per_second': 535.05, 'eval_steps_per_second': 4.324, 'epoch': 0.58}


[32;1m<s>[0m[31;1mThe[Cl][0m[32;1minical[0m[31;1m and[practice][0m[31;1m and[guidelines][0m[32;1m for[0m[32;1m the[0m[31;1m management[surgical][0m[32;1m management[0m[32;1m of[0m[31;1m patients[colon][0m[32;1m cancer[0m[32;1m:[0m[32;1m a[0m[31;1m systematic[consensus][0m[32;1m statement[0m[31;1m from[of][0m[32;1m the[0m[31;1m American[Hell][0m[31;1men[enic][0m[31;1m Society[and][0m[31;1m European[Cy][0m[31;1mstic[pri][0m[31;1mo[ot][0m[31;1m Society[Col][0m[32;1more[0m[32;1mct[0m[32;1mal[0m[32;1m Cancer[0m[31;1m Society[Study][0m[31;1m</s>[Group][0m[31;1m</s>[by][0m[32;1m the[0m[31;1m Society[He][0m[31;1mLa[S][0m[31;1m-[MO][0m[31;1m-[*][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.581, 'learning_rate': 2.0230219634824028e-05, 'epoch': 0.6}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.016227416694164276, 'eval_supp_data_loss_lm': 0.35305333137512207, 'eval_loss': 0.3699134886264801, 'eval_runtime': 5.6515, 'eval_samples_per_second': 525.521, 'eval_steps_per_second': 4.247, 'epoch': 0.6}


[32;1m<s>[0m[31;1mThe[Evidence][0m[32;1m for[0m[31;1m the[Phen][0m[31;1molic[otyp][0m[32;1mic[0m[31;1m and[Plastic][0m[32;1mity[0m[32;1m in[0m[31;1m the[Agg][0m[31;1mreg[ressive][0m[31;1m and[Triple][0m[31;1m Negative[-][0m[32;1mNeg[0m[32;1mative[0m[32;1m Breast[0m[32;1m Cancer[0m[32;1m:[0m[31;1m A[Human][0m[31;1m Immun[Biology][0m[31;1m and[Is][0m[31;1m a[Recap][0m[32;1mit[0m[32;1mulated[0m[32;1m by[0m[31;1m the[a][0m[32;1m Novel[0m[32;1m Model[0m[31;1m</s>[System][0m[32;1m</s>[0m




Saving model checkpoint to /lm_models/checkpoint-4500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/lm_models/checkpoint-2000] due to args.save_total_limit
INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5756, 'learning_rate': 1.9568668960042342e-05, 'epoch': 0.61}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.018505625426769257, 'eval_supp_data_loss_lm': 0.3525632917881012, 'eval_loss': 0.37179839611053467, 'eval_runtime': 5.5539, 'eval_samples_per_second': 534.755, 'eval_steps_per_second': 4.321, 'epoch': 0.61}


[32;1m<s>[0m[31;1mThe[End][0m[31;1moscopic[ogenous][0m[31;1m and[insulin][0m[31;1m resistance[secretion][0m[31;1m in[even][0m[31;1m in[at][0m[31;1m the[a][0m[31;1m high[very][0m[32;1m low[0m[31;1m risk[level][0m[31;1m is[contributes][0m[32;1m to[0m[32;1m the[0m[31;1m development[stability][0m[32;1m of[0m[31;1m the[blood][0m[31;1m pressure[glucose][0m[31;1m in[control][0m[32;1m in[0m[31;1m rats[ful][0m[32;1mmin[0m[32;1mant[0m[31;1m rats[type][0m[31;1m 2[1][0m[31;1m diabetic[diabetes][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5684, 'learning_rate': 1.8907118285260652e-05, 'epoch': 0.62}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.01721191219985485, 'eval_supp_data_loss_lm': 0.35168394446372986, 'eval_loss': 0.3695942461490631, 'eval_runtime': 5.4479, 'eval_samples_per_second': 545.159, 'eval_steps_per_second': 4.405, 'epoch': 0.62}


[32;1m<s>[0m[31;1mThe[L][0m[31;1moss[ateral][0m[31;1m and[K][0m[32;1mnee[0m[31;1m Ar[Pain][0m[31;1m in[after][0m[31;1m Total[Outside][0m[32;1m-[0m[31;1mof[in][0m[31;1m Surgery[Anat][0m[31;1momy[omic][0m[31;1m Surgery[Double][0m[32;1m-[0m[31;1mBl[B][0m[32;1mundle[0m[31;1m Surgery[An][0m[32;1mterior[0m[32;1m Cru[0m[32;1mci[0m[32;1mate[0m[32;1m L[0m[32;1mig[0m[32;1mament[0m[32;1m Reconstruction[0m[31;1m:[Using][0m[31;1m a[the][0m[31;1m Poster[Tight][0m[31;1m Junction[R][0m[31;1misk[ope][0m[31;1m-[RT][0m[31;1m-[</s>][0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5622, 'learning_rate': 1.8245567610478963e-05, 'epoch': 0.64}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.02021445706486702, 'eval_supp_data_loss_lm': 0.35102733969688416, 'eval_loss': 0.37201350927352905, 'eval_runtime': 5.7212, 'eval_samples_per_second': 519.124, 'eval_steps_per_second': 4.195, 'epoch': 0.64}


[32;1m<s>[0m[31;1mThe[T][0m[31;1mreatment[ACC][0m[31;1m-[3][0m[31;1m-[is][0m[32;1m a[0m[31;1m novel[micro][0m[31;1mRNA[tub][0m[32;1mule[0m[31;1m-[plus][0m[31;1m protein[end][0m[31;1mopl[-][0m[31;1mstage[tracking][0m[32;1m protein[0m[32;1m that[0m[32;1m promotes[0m[31;1m the[ax][0m[31;1monal[on][0m[31;1m formation[elong][0m[32;1mation[0m[32;1m and[0m[31;1m migration[also][0m[31;1m inhibits[regulates][0m[31;1m the[micro][0m[31;1mgl[tub][0m[32;1mule[0m[31;1m-[plus][0m[31;1m cell[end][0m[31;1m-[dynamics][0m[31;1m</s>[in][0m[31;1m human[multiple][0m[31;1m my[embryonic][0m[31;1m stem[cell][0m[31;1m lines[types][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5566, 'learning_rate': 1.7584016935697276e-05, 'epoch': 0.65}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.016418354585766792, 'eval_supp_data_loss_lm': 0.3504890501499176, 'eval_loss': 0.3675645589828491, 'eval_runtime': 5.6926, 'eval_samples_per_second': 521.729, 'eval_steps_per_second': 4.216, 'epoch': 0.65}


[32;1m<s>[0m[31;1mThe[Mult][0m[31;1mid[ivariate][0m[31;1m Analysis[Gen][0m[31;1momics[ome][0m[32;1m-[0m[32;1mWide[0m[31;1m Analysis[Association][0m[31;1m Study[An][0m[32;1malyses[0m[32;1m Reve[0m[32;1mal[0m[32;1m the[0m[31;1m Role[Genetic][0m[31;1m Diversity[Bas][0m[32;1mis[0m[32;1m of[0m[31;1m the[Seed][0m[31;1mling[Fat][0m[32;1mty[0m[32;1m Acid[0m[31;1m Production[Com][0m[32;1mposition[0m[32;1m in[0m[31;1m the[O][0m[31;1mry[at][0m[31;1m-[(][0m[31;1mL[A][0m[31;1mry[ven][0m[32;1ma[0m[32;1m sat[0m[32;1miva[0m[32;1m L[0m[32;1m.)[0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5525, 'learning_rate': 1.6922466260915587e-05, 'epoch': 0.66}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.012365289032459259, 'eval_supp_data_loss_lm': 0.3498590886592865, 'eval_loss': 0.36279717087745667, 'eval_runtime': 5.6129, 'eval_samples_per_second': 529.137, 'eval_steps_per_second': 4.276, 'epoch': 0.66}


[32;1m<s>[0m[31;1mThe[S][0m[31;1murgical[ero][0m[32;1m-[0m[31;1m and[ident][0m[31;1mical[ification][0m[32;1m of[0m[31;1m a[the][0m[31;1m novel[a][0m[31;1mry[et][0m[31;1miology[i][0m[31;1mogenic[ologies][0m[32;1m of[0m[31;1m the[human][0m[31;1m pap[malaria][0m[31;1m virus[exposure][0m[31;1m and[(][0m[31;1mH[Pl][0m[32;1mas[0m[32;1mmod[0m[32;1mium[0m[31;1m fal[s][0m[32;1mpp[0m[32;1m.)[0m[31;1m and[in][0m[32;1m the[0m[31;1m United[Lim][0m[31;1mb[u][0m[31;1m Province[K][0m[31;1mwa[oss][0m[32;1ma[0m[31;1m region[District][0m[31;1m,[of][0m[31;1m China[Jim][0m[32;1mma[0m[31;1m,[Zone][0m[32;1m,[0m[31;1m Ethiopia[South][0m[31;1m Ethiopia[western][0m[32;1m Ethiopia[0m[32;1m</s>[0m




Saving model checkpoint to /lm_models/checkpoint-5000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/lm_models/checkpoint-2500] due to args.save_total_limit
INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.545, 'learning_rate': 1.6260915586133897e-05, 'epoch': 0.67}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.014474832452833652, 'eval_supp_data_loss_lm': 0.34926021099090576, 'eval_loss': 0.3643675148487091, 'eval_runtime': 5.611, 'eval_samples_per_second': 529.316, 'eval_steps_per_second': 4.277, 'epoch': 0.67}


[32;1m<s>[0m[31;1mThe[Sub][0m[31;1mclinical[ac][0m[32;1mute[0m[31;1m and[Thy][0m[32;1mroid[0m[31;1mectomy[itis][0m[31;1m in[is][0m[32;1m Associated[0m[32;1m with[0m[31;1m Increased[H][0m[31;1meter[LA][0m[32;1m-[0m[32;1mB[0m[31;1m1[*][0m[31;1mA[18][0m[31;1m and[:][0m[31;1m A[01][0m[31;1m-[,][0m[31;1m a[-][0m[31;1m18[DR][0m[32;1mB[0m[32;1m1[0m[31;1m,[*][0m[31;1m11[01][0m[31;1m,[and][0m[32;1m -[0m[31;1m1[C][0m[31;1m>[*][0m[31;1m12[04][0m[32;1m:[0m[31;1m11[01][0m[31;1m,[-][0m[31;1m3[The][0m[31;1m Role[Sign][0m[32;1mific[0m[32;1mance[0m[32;1m of[0m[32;1m the[0m[31;1m N[New][0m[31;1m Ins[Molecular][0m[31;1m Mechan[Background][0m[31;1m of[</s>][0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5401, 'learning_rate': 1.559936491135221e-05, 'epoch': 0.69}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.013575717806816101, 'eval_supp_data_loss_lm': 0.3486480712890625, 'eval_loss': 0.362848699092865, 'eval_runtime': 5.6756, 'eval_samples_per_second': 523.296, 'eval_steps_per_second': 4.229, 'epoch': 0.69}


[32;1m<s>[0m[31;1mThe[Compar][0m[32;1mison[0m[32;1m of[0m[31;1m the[Ex][0m[31;1mogenous[isting][0m[31;1m and[Clinical][0m[31;1m Trials[Sc][0m[31;1males[oring][0m[31;1m Methods[Systems][0m[31;1m for[in][0m[31;1m Patients[Predict][0m[32;1ming[0m[31;1m the[Sever][0m[32;1mity[0m[31;1m of[and][0m[31;1m Mort[Pro][0m[32;1mgn[0m[31;1mosis[oses][0m[32;1m of[0m[31;1m Patients[Hyper][0m[31;1mgly[lip][0m[32;1mid[0m[31;1memia[emic][0m[31;1m Patients[Ac][0m[32;1mute[0m[31;1m My[Panc][0m[32;1mreat[0m[32;1mitis[0m[32;1m in[0m[31;1m Patients[Chinese][0m[32;1m Patients[0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5339, 'learning_rate': 1.4937814236570521e-05, 'epoch': 0.7}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.011377017013728619, 'eval_supp_data_loss_lm': 0.3484667241573334, 'eval_loss': 0.3604012429714203, 'eval_runtime': 5.5525, 'eval_samples_per_second': 534.892, 'eval_steps_per_second': 4.322, 'epoch': 0.7}


[32;1m<s>[0m[31;1mThe[Struct][0m[32;1mural[0m[31;1m and[plastic][0m[32;1mity[0m[32;1m of[0m[31;1m the[green][0m[31;1m tea[fluorescent][0m[31;1m nanop[protein][0m[31;1m-[to][0m[31;1m identify[amino][0m[32;1m acid[0m[31;1m-[delet][0m[32;1mions[0m[31;1m in[and][0m[31;1m their[flu][0m[32;1morescence[0m[31;1m of[rescue][0m[31;1m of[by][0m[31;1m a[folding][0m[31;1m of[-][0m[31;1mbased[enh][0m[31;1manced[ancing][0m[31;1m micro[mutations][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5319, 'learning_rate': 1.4276263561788834e-05, 'epoch': 0.71}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.012337611056864262, 'eval_supp_data_loss_lm': 0.3478792607784271, 'eval_loss': 0.3607954680919647, 'eval_runtime': 5.6774, 'eval_samples_per_second': 523.125, 'eval_steps_per_second': 4.227, 'epoch': 0.71}


[32;1m<s>[0m[31;1mThe[Struct][0m[32;1mural[0m[31;1m and[plastic][0m[32;1mity[0m[32;1m of[0m[31;1m the[green][0m[31;1m tea[fluorescent][0m[31;1m nanop[protein][0m[31;1m-[to][0m[31;1m enhance[amino][0m[32;1m acid[0m[31;1m-[delet][0m[32;1mions[0m[31;1m in[and][0m[31;1m their[flu][0m[32;1morescence[0m[31;1m of[rescue][0m[31;1m of[by][0m[31;1m a[folding][0m[31;1m of[-][0m[31;1mbased[enh][0m[31;1manced[ancing][0m[31;1m micro[mutations][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5274, 'learning_rate': 1.3614712887007144e-05, 'epoch': 0.73}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.010027620941400528, 'eval_supp_data_loss_lm': 0.34741392731666565, 'eval_loss': 0.3579484522342682, 'eval_runtime': 5.7855, 'eval_samples_per_second': 513.348, 'eval_steps_per_second': 4.148, 'epoch': 0.73}


[32;1m<s>[0m[31;1mThe[Pre][0m[32;1mval[0m[32;1mence[0m[31;1m and[,][0m[31;1m risk[intensity][0m[32;1m and[0m[32;1m risk[0m[32;1m factors[0m[32;1m of[0m[31;1m acute[t][0m[31;1mib[ung][0m[31;1msten[iasis][0m[31;1m among[in][0m[31;1m children[Kil][0m[32;1mifi[0m[31;1m,[County][0m[32;1m,[0m[31;1m South[Kenya][0m[31;1m:[II][0m[32;1m:[0m[31;1m a[Results][0m[32;1m from[0m[32;1m a[0m[31;1m cross[school][0m[32;1m-[0m[32;1mbased[0m[31;1m cross[observational][0m[32;1m study[0m[32;1m</s>[0m




Saving model checkpoint to /lm_models/checkpoint-5500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/lm_models/checkpoint-3000] due to args.save_total_limit
INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5248, 'learning_rate': 1.2953162212225456e-05, 'epoch': 0.74}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.009163916110992432, 'eval_supp_data_loss_lm': 0.34711185097694397, 'eval_loss': 0.3567846417427063, 'eval_runtime': 5.5578, 'eval_samples_per_second': 534.382, 'eval_steps_per_second': 4.318, 'epoch': 0.74}


[32;1m<s>[0m[31;1mThe[E][0m[32;1mval[0m[32;1muation[0m[32;1m of[0m[31;1m the[New][0m[31;1m Ins[D][0m[31;1mental[ih][0m[32;1myd[0m[31;1mro[roph][0m[31;1mosph[thal][0m[31;1mate[azine][0m[32;1m-[0m[31;1mBased[App][0m[31;1mlying[ended][0m[31;1m Poly[2][0m[31;1m-[,][0m[31;1m3[4][0m[32;1m-[0m[32;1mD[0m[31;1mih[iam][0m[31;1mino[in][0m[31;1mol[op][0m[32;1myrim[0m[31;1midine[id][0m[32;1mines[0m[31;1m as[against][0m[31;1m the[Bac][0m[32;1millus[0m[31;1m subt[anth][0m[32;1mrac[0m[32;1mis[0m[31;1m and[:][0m[31;1m A[Improved][0m[31;1m Ant[Synt][0m[31;1mhesis[heses][0m[31;1m and[Using][0m[32;1m a[0m[31;1m Novel[New][0m[31;1m Poly[P][0m[31;1morous[inc][0m[32;1mer[0m[31;1m-[Complex][0

INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5202, 'learning_rate': 1.2291611537443768e-05, 'epoch': 0.75}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.009637362323701382, 'eval_supp_data_loss_lm': 0.3465050756931305, 'eval_loss': 0.3566593527793884, 'eval_runtime': 5.592, 'eval_samples_per_second': 531.115, 'eval_steps_per_second': 4.292, 'epoch': 0.75}


[32;1m<s>[0m[31;1mThe[M][0m[32;1mole[0m[32;1mcular[0m[31;1m characterization[evidence][0m[31;1m for[of][0m[31;1m the[par][0m[31;1mathy[v][0m[32;1mov[0m[32;1mirus[0m[31;1m infection[B][0m[31;1m1[19][0m[31;1m infection[in][0m[32;1m the[0m[31;1m presence[cut][0m[32;1maneous[0m[31;1m tract[poly][0m[31;1mcy[arter][0m[31;1mial[itis][0m[31;1m of[nod][0m[31;1mule[osa][0m[31;1m of[tissue][0m[31;1m of[from][0m[32;1m a[0m[32;1m patient[0m[32;1m with[0m[31;1m a[par][0m[32;1mv[0m[32;1mov[0m[32;1mirus[0m[32;1m-[0m[32;1massociated[0m[31;1m pneumonia[hem][0m[32;1moph[0m[31;1milia[ag][0m[32;1mocy[0m[32;1mtic[0m[31;1m leukemia[syndrome][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5185, 'learning_rate': 1.163006086266208e-05, 'epoch': 0.77}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.011253145523369312, 'eval_supp_data_loss_lm': 0.3461898863315582, 'eval_loss': 0.358002632856369, 'eval_runtime': 5.6414, 'eval_samples_per_second': 526.463, 'eval_steps_per_second': 4.254, 'epoch': 0.77}


[32;1m<s>[0m[31;1mThe[Screen][0m[32;1ming[0m[32;1m for[0m[31;1m the[an][0m[31;1memia[underlying][0m[31;1m role[my][0m[32;1mel[0m[31;1moid[op][0m[32;1mrol[0m[32;1mifer[0m[32;1mative[0m[31;1m syndrome[ne][0m[31;1mph[opl][0m[32;1masm[0m[32;1m in[0m[31;1m a[patients][0m[32;1m with[0m[31;1m advanced[th][0m[32;1mrom[0m[32;1mb[0m[32;1mocy[0m[31;1mtop[t][0m[31;1moma[osis][0m[31;1m:[post][0m[32;1m-[0m[31;1moperative[ind][0m[32;1muction[0m[31;1m:[chemotherapy][0m[31;1m:[for][0m[31;1m advanced[acute][0m[31;1m lymph[my][0m[32;1mel[0m[32;1moid[0m[32;1m leukemia[0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5155, 'learning_rate': 1.0968510187880392e-05, 'epoch': 0.78}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.008530760183930397, 'eval_supp_data_loss_lm': 0.3458307683467865, 'eval_loss': 0.3548477590084076, 'eval_runtime': 5.6084, 'eval_samples_per_second': 529.563, 'eval_steps_per_second': 4.279, 'epoch': 0.78}


[32;1m<s>[0m[31;1mThe[�][0m[32;1m�[0m[31;1mIt[T][0m[31;1mreatment[ape][0m[31;1m-[dermat][0m[31;1mitis[osc][0m[32;1mopy[0m[32;1m�[0m[32;1m�[0m[32;1m:[0m[31;1m a[constructing][0m[32;1m a[0m[31;1m new[low][0m[32;1m-[0m[31;1mlevel[cost][0m[31;1m,[dermat][0m[31;1mology[oscope][0m[31;1m for[using][0m[32;1m a[0m[31;1m new[mobile][0m[32;1m phone[0m[31;1m-[,][0m[31;1m a[immersion][0m[31;1m,[fluid][0m[31;1m,[and][0m[31;1m a[transparent][0m[31;1m-[adhesive][0m[31;1m</s>[tape][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5116, 'learning_rate': 1.0306959513098703e-05, 'epoch': 0.79}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.009295112453401089, 'eval_supp_data_loss_lm': 0.3453930914402008, 'eval_loss': 0.3551974892616272, 'eval_runtime': 5.6516, 'eval_samples_per_second': 525.512, 'eval_steps_per_second': 4.247, 'epoch': 0.79}


[32;1m<s>[0m[31;1mThe[Ad][0m[31;1mherence[ren][0m[31;1mal[omed][0m[32;1mull[0m[32;1min[0m[31;1m-[,][0m[31;1m a[period][0m[32;1mont[0m[31;1mal[itis][0m[31;1m and[,][0m[31;1m and[diabetes][0m[31;1m,[-][0m[31;1mrelated[un][0m[31;1minfect[ravel][0m[32;1ming[0m[31;1m,[the][0m[31;1m role[equ][0m[31;1mine[iv][0m[31;1miral[ocal][0m[31;1m role[relationship][0m[31;1m between[:][0m[31;1m a[A][0m[31;1m case[clinic][0m[31;1mopath[obi][0m[31;1mological[ochemical][0m[31;1m study[cross][0m[32;1m-[0m[32;1msectional[0m[32;1m study[0m[31;1m in[</s>][0m




Saving model checkpoint to /lm_models/checkpoint-6000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/lm_models/checkpoint-3500] due to args.save_total_limit
INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5106, 'learning_rate': 9.645408838317017e-06, 'epoch': 0.81}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.007121555507183075, 'eval_supp_data_loss_lm': 0.34507158398628235, 'eval_loss': 0.3526352345943451, 'eval_runtime': 5.5007, 'eval_samples_per_second': 539.936, 'eval_steps_per_second': 4.363, 'epoch': 0.81}


[32;1m<s>[0m[31;1mThe[Gen][0m[31;1metic[ome][0m[31;1m-[wide][0m[31;1m association[identification][0m[31;1m of[and][0m[31;1m expression[characterization][0m[32;1m of[0m[31;1m a[micro][0m[31;1mRNA[s][0m[32;1matellite[0m[32;1m markers[0m[32;1m in[0m[31;1m the[black][0m[31;1m and[pepper][0m[32;1m ([0m[31;1mC[P][0m[31;1mrun[iper][0m[31;1mia[n][0m[31;1miger[igr][0m[32;1mum[0m[31;1m L[):][0m[31;1m a[A][0m[31;1m new[valuable][0m[31;1m tool[resource][0m[32;1m for[0m[31;1m the[boosting][0m[31;1m the[gen][0m[31;1motyp[omics][0m[31;1m</s>[applications][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5083, 'learning_rate': 8.983858163535327e-06, 'epoch': 0.82}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.007395031396299601, 'eval_supp_data_loss_lm': 0.34481868147850037, 'eval_loss': 0.3526866137981415, 'eval_runtime': 5.6641, 'eval_samples_per_second': 524.358, 'eval_steps_per_second': 4.237, 'epoch': 0.82}


[32;1m<s>[0m[31;1mThe[E][0m[32;1mval[0m[32;1muation[0m[32;1m of[0m[32;1m the[0m[31;1m efficacy[Clinical][0m[31;1m Out[and][0m[31;1m Clinical[Economic][0m[31;1m Out[B][0m[32;1murden[0m[32;1m of[0m[31;1m Infect[Poor][0m[31;1m Out[Gly][0m[32;1mcemic[0m[32;1m Control[0m[31;1m in[Associated][0m[32;1m with[0m[31;1m the[Ther][0m[32;1mape[0m[32;1mutic[0m[31;1m Therapy[In][0m[31;1mject[ert][0m[31;1mial[ia][0m[32;1m in[0m[32;1m Patients[0m[32;1m with[0m[32;1m Type[0m[31;1m 2[][0m[32;1m2[0m[32;1m Diabetes[0m[31;1m</s>[in][0m[32;1m the[0m[32;1m United[0m[32;1m States[0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5052, 'learning_rate': 8.322307488753639e-06, 'epoch': 0.83}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.0069491066969931126, 'eval_supp_data_loss_lm': 0.3446246385574341, 'eval_loss': 0.3520488142967224, 'eval_runtime': 5.6317, 'eval_samples_per_second': 527.37, 'eval_steps_per_second': 4.262, 'epoch': 0.83}


[32;1m<s>[0m[32;1mThe[0m[31;1m effect[Con][0m[31;1msequ[sequence][0m[32;1m of[0m[31;1m the[Imm][0m[32;1mune[0m[31;1m Response[Supp][0m[31;1mression[ressive][0m[31;1m Effects[Cells][0m[32;1m in[0m[32;1m the[0m[31;1m Brain[Use][0m[32;1m of[0m[31;1m a[Ther][0m[32;1mape[0m[32;1mutic[0m[31;1m Agents[Cancer][0m[31;1m Therapy[Vacc][0m[31;1mine[ines][0m[31;1m:[and][0m[32;1m Their[0m[31;1m Association[Import][0m[32;1mance[0m[31;1m for[in][0m[31;1m the[Imm][0m[32;1mune[0m[31;1m Response[Monitoring][0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5043, 'learning_rate': 7.660756813971951e-06, 'epoch': 0.85}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.0074229673482477665, 'eval_supp_data_loss_lm': 0.3443067967891693, 'eval_loss': 0.3522076904773712, 'eval_runtime': 5.6985, 'eval_samples_per_second': 521.189, 'eval_steps_per_second': 4.212, 'epoch': 0.85}


[32;1m<s>[0m[32;1mThe[0m[31;1m effect[association][0m[32;1m between[0m[31;1m the[time][0m[32;1m to[0m[31;1m death[antibiotics][0m[32;1m and[0m[31;1m the[relevant][0m[32;1m clinical[0m[32;1m outcomes[0m[32;1m in[0m[31;1m patients[emergency][0m[32;1m department[0m[32;1m patients[0m[32;1m with[0m[31;1m acute[various][0m[31;1m disease[stages][0m[32;1m of[0m[31;1m chronic[se][0m[32;1mps[0m[32;1mis[0m[31;1m</s>[:][0m[32;1m a[0m[31;1m retrospective[prospective][0m[31;1m cohort[multi][0m[32;1m-[0m[32;1mcenter[0m[32;1m study[0m[32;1m</s>[0m




INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.502, 'learning_rate': 6.9992061391902616e-06, 'epoch': 0.86}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.005575120449066162, 'eval_supp_data_loss_lm': 0.34403860569000244, 'eval_loss': 0.3500642478466034, 'eval_runtime': 5.5586, 'eval_samples_per_second': 534.308, 'eval_steps_per_second': 4.318, 'epoch': 0.86}


[32;1m<s>[0m[31;1mThe[Low][0m[32;1m-[0m[32;1mdose[0m[31;1m chemotherapy[inoc][0m[32;1mulation[0m[32;1m of[0m[31;1m human[Esc][0m[32;1mher[0m[32;1mich[0m[32;1mia[0m[32;1m coli[0m[31;1m and[achieves][0m[31;1m high[robust][0m[31;1m antit[vaginal][0m[31;1m muc[colonization][0m[32;1m and[0m[31;1m improves[results][0m[32;1m in[0m[31;1m a[ascending][0m[31;1m and[infection][0m[31;1m in[accompanied][0m[32;1m by[0m[31;1m a[severe][0m[31;1m acute[uter][0m[32;1mine[0m[31;1m infection[inflammation][0m[31;1m</s>[in][0m[32;1m mice[0m[32;1m</s>[0m




Saving model checkpoint to /lm_models/checkpoint-6500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Deleting older checkpoint [/lm_models/checkpoint-4000] due to args.save_total_limit
INFO:***** Running Evaluation *****
INFO:  Num examples = 2970
INFO:  Batch size = 128


{'loss': 0.5001, 'learning_rate': 6.337655464408574e-06, 'epoch': 0.87}


  0%|          | 0/24 [00:00<?, ?it/s]

{'eval_supp_data_loss_z': 0.005742605309933424, 'eval_supp_data_loss_lm': 0.34387946128845215, 'eval_loss': 0.3500670790672302, 'eval_runtime': 5.6614, 'eval_samples_per_second': 524.605, 'eval_steps_per_second': 4.239, 'epoch': 0.87}


[32;1m<s>[0m[31;1mThe[In][0m[31;1mhibition[fer][0m[32;1mring[0m[32;1m the[0m[31;1m effects[presence][0m[32;1m of[0m[32;1m a[0m[31;1m novel[flat][0m[32;1moxin[0m[31;1m in[-][0m[31;1mlike[producing][0m[31;1m Esc[As][0m[32;1mper[0m[32;1mg[0m[32;1millus[0m[31;1m f[flav][0m[32;1mus[0m[31;1m in[strains][0m[31;1m in[using][0m[31;1m a[RNA][0m[31;1m-[sequencing][0m[32;1m and[0m[31;1m RNA[electronic][0m[31;1m health[probes][0m[31;1m</s>[as][0m[32;1m a[0m[31;1m tool[transcript][0m[31;1mome[omic][0m[31;1m approach[screening][0m[32;1m tool[0m[32;1m</s>[0m




KeyboardInterrupt: 

#### With CLI:


```bash
python -m smtag.cli.lm.train smtag/loader/loader_lm.py SEQ2SEQ --data_dir /data/json/oapmc_title --per_device_train_batch_size=128 --per_device_eval_batch_size=128 --logging_steps=100 --num_train_epochs=1 --no_cache

python -m smtag.cli.lm.train smtag/loader/loader_lm.py SEQ2SEQ --data_dir /data/json/emboj_abstracts --per_device_train_batch_size=8 --per_device_eval_batch_size=8 --logging_steps=100 --num_train_epochs=1 --no_cache
```