In [1]:
%pip install git+https://github.com/gaussalgo/adaptor
%pip install git+https://github.com/katarinagresova/AgoBind
%pip install sklearn
%pip install comet-ml
%pip install torchmetrics

Collecting git+https://github.com/gaussalgo/adaptor
  Cloning https://github.com/gaussalgo/adaptor to /tmp/pip-req-build-6_0yn149
  Running command git clone --filter=blob:none --quiet https://github.com/gaussalgo/adaptor /tmp/pip-req-build-6_0yn149
  Resolved https://github.com/gaussalgo/adaptor to commit db33e6e439babc68fe801a8946d87116ff44f170
  Preparing metadata (setup.py) ... [?25ldone
Note: you may need to restart the kernel to use updated packages.
Collecting git+https://github.com/katarinagresova/AgoBind
  Cloning https://github.com/katarinagresova/AgoBind to /tmp/pip-req-build-8k7msi3z
  Running command git clone --filter=blob:none --quiet https://github.com/katarinagresova/AgoBind /tmp/pip-req-build-8k7msi3z
  Resolved https://github.com/katarinagresova/AgoBind to commit ca59766c661cfc253745429111c9d0baeaa0b9d3
  Preparing metadata (setup.py) ... [?25ldone
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use upda

In [2]:
import comet_ml

In [3]:
# 1. pick the model base
from adaptor.lang_module import LangModule

kmer_len = 6
stride = 1
lang_module = LangModule(f"armheb/DNA_bert_{kmer_len}")

In [4]:
# 2. Initialize training arguments
# We apply NUM_STEPS stopping strategy in cases where at least one of the objectives does not converge in max_steps
from adaptor.utils import AdaptationArguments, StoppingStrategy

training_arguments = AdaptationArguments(output_dir="dnabert_for_clash",
                                         learning_rate=2e-4,
                                         weight_decay=0.01,
                                         max_steps=100000,
                                         stopping_strategy=StoppingStrategy.ALL_OBJECTIVES_CONVERGED,
                                         # stopping_strategy=StoppingStrategy.NUM_STEPS_ALL_OBJECTIVES,
                                         do_train=True,
                                         do_eval=True,
                                         warmup_steps=5000,
                                         gradient_accumulation_steps=4,
                                         logging_steps=100,
                                         eval_steps=100,
                                         save_steps=100,
                                         num_train_epochs=30,
                                         evaluation_strategy="steps",
                                         also_log_converged_objectives=True)

In [5]:
import pandas as pd
import numpy as np

def prepare_data(path_to_csv, path_to_txt, path_to_labels):
    dset = pd.read_csv(path_to_csv, sep='\t')
    dset['seq'] = dset.apply(lambda x: x['miRNA'] + 'NNNN' + x['gene'], axis=1)
    dset['seq'] = dset['seq'].apply(lambda x: ' '.join([x[i:i+kmer_len] for i in range(0, len(x)-kmer_len+1, stride)]))
    np.savetxt(path_to_txt, dset['seq'].values, fmt='%s')
    np.savetxt(path_to_labels, dset['label'].values, fmt='%s')

In [6]:
prepare_data('../data/train_set_1_1_CLASH2013_paper.tsv', '../data/train_set_1_1_CLASH2013_paper.txt', '../data/train_set_1_1_CLASH2013_paper_labels.txt')
prepare_data('../data/evaluation_set_1_1_CLASH2013_paper.tsv', '../data/evaluation_set_1_1_CLASH2013_paper.txt', '../data/evaluation_set_1_1_CLASH2013_paper_labels.txt')

In [7]:
from adaptor.evaluators.sequence_classification import SeqClassificationEvaluator, SeqClassificationEvaluator
from sklearn.metrics import precision_recall_curve, auc
#from torchmetrics import PrecisionRecallCurve
import torch
from transformers import PreTrainedTokenizer

from adaptor.evaluators.evaluator_base import EvaluatorBase
from adaptor.utils import Head, AdaptationDataset
    
class SequenceAucPr(SeqClassificationEvaluator):
    """
    Sequence classification area under Precision-Recall curve, where each input sample of dataset falls into a single category.
    """

    smaller_is_better: bool = False

    def __call__(self, model: torch.nn.Module, tokenizer: PreTrainedTokenizer, dataset: AdaptationDataset) -> float:
        """
        Refer to the superclass documentation.
        """
        expected = []
        actual = []

        for batch in dataset:
            expected.extend(batch["labels"].cpu())
            actual.extend(model(**batch).logits.argmax(-1).cpu())

        assert len(expected) == len(actual)

        #pr_curve = PrecisionRecallCurve(pos_label=1)
        p, r, thresholds = precision_recall_curve(expected, actual)
        auc_precision_recall = auc(r, p)
        return auc_precision_recall

In [8]:
# 3. pick objectives
# Objectives take either List[str] for in-memory iteration, or a source file path for streamed iterati
from adaptor.objectives.MLM import MaskedLanguageModeling
from adaptor.objectives.classification import SequenceClassification

mlm = MaskedLanguageModeling(lang_module,
                                 batch_size=16,
                                 texts_or_path='../data/train_set_1_1_CLASH2013_paper.txt',
                                 val_texts_or_path='../data/evaluation_set_1_1_CLASH2013_paper.txt',
                            )

cls = SequenceClassification(lang_module,
                                  batch_size=64,
                                  texts_or_path='../data/train_set_1_1_CLASH2013_paper.txt',
                                  labels_or_path='../data/train_set_1_1_CLASH2013_paper_labels.txt',
                                 val_texts_or_path='../data/evaluation_set_1_1_CLASH2013_paper.txt',
                                 val_labels_or_path='../data/evaluation_set_1_1_CLASH2013_paper_labels.txt',
                                 val_evaluators=[SequenceAucPr(decides_convergence=True)]
)

Some weights of the model checkpoint at armheb/DNA_bert_6 were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at armheb/DNA_bert_6 and are n

In [9]:
# 4. pick a schedule of the selected objectives
# This one will initially fit the first objective until convergence on its eval set, then fits the second one 
from adaptor.schedules import ParallelSchedule, SequentialSchedule

schedule = SequentialSchedule([cls], training_arguments)
#schedule = ParallelSchedule([mlm, cls], training_arguments)
#schedule = SequentialSchedule(
#    objectives=[mlm, cls], 
#    args=training_arguments
#)

Total number of train samples: 30784
Total number of eval samples: 2000


In [10]:
comet_ml.init(project_name='dnabert_for_clash', api_key='3NQhHgMmmlfnoqTcvkG03nYo9')

COMET INFO: Comet API key is valid
COMET INFO: Comet API key saved in /home/jovyan/.comet.config


In [None]:
# 4. Run the training using Adapter, similarly to running HF.Trainer, only adding `schedule`
from adaptor.adapter import Adapter
#from transformers.integrations import CometCallback

adapter = Adapter(lang_module=lang_module, schedule=schedule, args=training_arguments)#, callbacks=[CometCallback()])
adapter.train()

max_steps is given, it will override any value given in num_train_epochs
***** Running training *****
  Num examples = 14430
  Num Epochs = 28
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 4
  Total optimization steps = 100000
COMET INFO: Experiment is live on comet.ml https://www.comet.ml/katarinagresova/dnabert-for-clash/2beeacbecc194ac0ae1de8b72c521388

Automatic Comet.ml online logging enabled
Converged objectives: []
SequenceClassification:   3%|▎         | 14/481 [00:03<01:34,  4.92batches/s, epoch=1, loss=0.681, split=train]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


SequenceClassification:  83%|████████▎ | 400/481 [00:49<00:09,  8.57batches/s, epoch=1, loss=0.699, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 0.737, 'learning_rate': 4.000000000000001e-06, 'train_SequenceClassification_loss': 0.73701636813581, 'train_SequenceClassification_num_batches': 400, 'epoch': 0.03}


SequenceClassification: 32batches [00:01, 21.34batches/s, epoch=1, loss=0.711, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-100/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-100/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-100/SequenceClassification/config.json


{'eval_loss': 0.709191083908081, 'eval_runtime': 1.5023, 'eval_samples_per_second': 20.635, 'eval_steps_per_second': 20.635, 'eval_SequenceClassification_loss': 0.7092440202832222, 'eval_SequenceClassification_num_batches': 32, 'eval_SequenceClassification_SequenceAucPr': 0.5950905306971904, 'epoch': 0.03}


Model weights saved in dnabert_for_clash/checkpoint-100/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:04<00:00,  8.12batches/s, epoch=1, loss=0.416, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:04<00:00,  7.43batches/s, epoch=1, loss=0.416, split=train]
SequenceClassification:  66%|██████▋   | 319/481 [00:37<00:18,  8.53batches/s, epoch=2, loss=1.19, split=train] Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 0.8427, 'learning_rate': 8.000000000000001e-06, 'train_SequenceClassification_loss': 0.7898371073231101, 'train_SequenceClassification_num_batches': 800, 'epoch': 0.06}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=1, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.13batches/s, epoch=2, loss=1.14, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-200/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-200/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-200/SequenceClassification/config.json


{'eval_loss': 0.7621480822563171, 'eval_runtime': 1.5163, 'eval_samples_per_second': 20.444, 'eval_steps_per_second': 20.444, 'eval_SequenceClassification_loss': 0.7416698397137225, 'eval_SequenceClassification_num_batches': 64, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.06}


Model weights saved in dnabert_for_clash/checkpoint-200/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  8.34batches/s, epoch=2, loss=0.0118, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  7.71batches/s, epoch=2, loss=0.0118, split=train]
SequenceClassification:  49%|████▉     | 238/481 [00:28<00:28,  8.56batches/s, epoch=3, loss=0.00343, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.0458, 'learning_rate': 1.2e-05, 'train_SequenceClassification_loss': 0.9217161289104261, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.08}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=2, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.28batches/s, epoch=3, loss=6.2, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-300/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-300/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-300/SequenceClassification/config.json


{'eval_loss': 3.075151205062866, 'eval_runtime': 1.5056, 'eval_samples_per_second': 20.59, 'eval_steps_per_second': 20.59, 'eval_SequenceClassification_loss': 1.5520200504324748, 'eval_SequenceClassification_num_batches': 96, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.08}


Model weights saved in dnabert_for_clash/checkpoint-300/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  8.44batches/s, epoch=3, loss=0.00304, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  7.66batches/s, epoch=3, loss=0.00304, split=train]
SequenceClassification:  33%|███▎      | 157/481 [00:18<00:37,  8.53batches/s, epoch=4, loss=0.00361, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.4741, 'learning_rate': 1.6000000000000003e-05, 'train_SequenceClassification_loss': 1.1794152363205794, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.11}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=3, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.06batches/s, epoch=4, loss=5.97, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-400/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-400/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-400/SequenceClassification/config.json


{'eval_loss': 2.960547685623169, 'eval_runtime': 1.5165, 'eval_samples_per_second': 20.442, 'eval_steps_per_second': 20.442, 'eval_SequenceClassification_loss': 1.9276417622950248, 'eval_SequenceClassification_num_batches': 128, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.11}


Model weights saved in dnabert_for_clash/checkpoint-400/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  8.45batches/s, epoch=4, loss=0.00125, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  7.65batches/s, epoch=4, loss=0.00125, split=train]
SequenceClassification:  16%|█▌        | 76/481 [00:09<00:47,  8.50batches/s, epoch=5, loss=0.0661, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.5271, 'learning_rate': 2e-05, 'train_SequenceClassification_loss': 1.368099317157641, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.14}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=4, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.03batches/s, epoch=5, loss=3.34, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-500/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-500/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-500/SequenceClassification/config.json


{'eval_loss': 1.6771562099456787, 'eval_runtime': 1.5242, 'eval_samples_per_second': 20.339, 'eval_steps_per_second': 20.339, 'eval_SequenceClassification_loss': 1.8879416634721564, 'eval_SequenceClassification_num_batches': 160, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.14}


Model weights saved in dnabert_for_clash/checkpoint-500/SequenceClassification/pytorch_model.bin
SequenceClassification:  99%|█████████▉| 476/481 [01:02<00:00,  8.52batches/s, epoch=5, loss=0.00125, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 0.5656, 'learning_rate': 2.4e-05, 'train_SequenceClassification_loss': 1.1064297522277338, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.17}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=5, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.31batches/s, epoch=5, loss=0.000917, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-600/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-600/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-600/SequenceClassification/config.json


{'eval_loss': 3.526146650314331, 'eval_runtime': 1.5039, 'eval_samples_per_second': 20.614, 'eval_steps_per_second': 20.614, 'eval_SequenceClassification_loss': 2.14261532723746, 'eval_SequenceClassification_num_batches': 192, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.17}


Model weights saved in dnabert_for_clash/checkpoint-600/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:07<00:00,  2.07batches/s, epoch=5, loss=0.00116, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:07<00:00,  7.08batches/s, epoch=5, loss=0.00116, split=train]
SequenceClassification:  82%|████████▏ | 395/481 [00:47<00:10,  8.43batches/s, epoch=6, loss=0.00164, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.3891, 'learning_rate': 2.8000000000000003e-05, 'train_SequenceClassification_loss': 1.0452638140710768, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.19}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=5, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.24batches/s, epoch=6, loss=0.0011, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-700/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-700/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-700/SequenceClassification/config.json


{'eval_loss': 3.4335293769836426, 'eval_runtime': 1.5088, 'eval_samples_per_second': 20.546, 'eval_steps_per_second': 20.546, 'eval_SequenceClassification_loss': 2.3117083689742555, 'eval_SequenceClassification_num_batches': 224, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.19}


Model weights saved in dnabert_for_clash/checkpoint-700/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  8.28batches/s, epoch=6, loss=0.000859, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:03<00:00,  7.63batches/s, epoch=6, loss=0.000859, split=train]
SequenceClassification:  65%|██████▌   | 314/481 [00:37<00:19,  8.45batches/s, epoch=7, loss=0.079, split=train]   Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.5596, 'learning_rate': 3.2000000000000005e-05, 'train_SequenceClassification_loss': 1.2059903278655255, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.22}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=6, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.04batches/s, epoch=7, loss=0.0371, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-800/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-800/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-800/SequenceClassification/config.json


{'eval_loss': 1.6877338886260986, 'eval_runtime': 1.5229, 'eval_samples_per_second': 20.356, 'eval_steps_per_second': 20.356, 'eval_SequenceClassification_loss': 2.227263622362443, 'eval_SequenceClassification_num_batches': 256, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.22}


Model weights saved in dnabert_for_clash/checkpoint-800/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:03<00:00,  8.44batches/s, epoch=7, loss=0.00127, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:03<00:00,  7.62batches/s, epoch=7, loss=0.00127, split=train]
SequenceClassification:  48%|████▊     | 233/481 [00:27<00:29,  8.51batches/s, epoch=8, loss=0.000743, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 0.7971, 'learning_rate': 3.6e-05, 'train_SequenceClassification_loss': 1.1903262593513937, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.25}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=7, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.20batches/s, epoch=8, loss=7.61, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-900/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-900/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-900/SequenceClassification/config.json


{'eval_loss': 3.773026704788208, 'eval_runtime': 1.5114, 'eval_samples_per_second': 20.51, 'eval_steps_per_second': 20.51, 'eval_SequenceClassification_loss': 2.4123271010111136, 'eval_SequenceClassification_num_batches': 288, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.25}


Model weights saved in dnabert_for_clash/checkpoint-900/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  8.46batches/s, epoch=8, loss=0.000975, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  7.64batches/s, epoch=8, loss=0.000975, split=train]
SequenceClassification:  32%|███▏      | 152/481 [00:18<00:38,  8.52batches/s, epoch=9, loss=0.00243, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.4882, 'learning_rate': 4e-05, 'train_SequenceClassification_loss': 1.2306209717416206, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.28}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=8, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.11batches/s, epoch=9, loss=6.44, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-1000/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-1000/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-1000/SequenceClassification/config.json


{'eval_loss': 3.196300983428955, 'eval_runtime': 1.5185, 'eval_samples_per_second': 20.415, 'eval_steps_per_second': 20.415, 'eval_SequenceClassification_loss': 2.5008701537619347, 'eval_SequenceClassification_num_batches': 320, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.28}


Model weights saved in dnabert_for_clash/checkpoint-1000/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:03<00:00,  8.19batches/s, epoch=9, loss=0.000923, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:03<00:00,  7.57batches/s, epoch=9, loss=0.000923, split=train]
SequenceClassification:  15%|█▍        | 71/481 [00:08<00:48,  8.49batches/s, epoch=10, loss=0.0869, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.5527, 'learning_rate': 4.4000000000000006e-05, 'train_SequenceClassification_loss': 1.331231181347277, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.3}


SequenceClassification:   0%|          | 0/31 [00:52<?, ?batches/s, epoch=9, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.11batches/s, epoch=10, loss=3.09, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-1100/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-1100/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-1100/SequenceClassification/config.json


{'eval_loss': 1.5540165901184082, 'eval_runtime': 1.5182, 'eval_samples_per_second': 20.419, 'eval_steps_per_second': 20.419, 'eval_SequenceClassification_loss': 2.419144620272246, 'eval_SequenceClassification_num_batches': 352, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.3}


Model weights saved in dnabert_for_clash/checkpoint-1100/SequenceClassification/pytorch_model.bin
SequenceClassification:  98%|█████████▊| 471/481 [01:01<00:01,  8.51batches/s, epoch=10, loss=0.00141, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 0.7692, 'learning_rate': 4.8e-05, 'train_SequenceClassification_loss': 1.2521414867267013, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.33}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=10, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.03batches/s, epoch=10, loss=0.00105, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-1200/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-1200/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-1200/SequenceClassification/config.json


{'eval_loss': 3.460289478302002, 'eval_runtime': 1.524, 'eval_samples_per_second': 20.341, 'eval_steps_per_second': 20.341, 'eval_SequenceClassification_loss': 2.496898247032732, 'eval_SequenceClassification_num_batches': 384, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.33}


Model weights saved in dnabert_for_clash/checkpoint-1200/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:08<00:00,  5.43batches/s, epoch=10, loss=0.00134, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:08<00:00,  7.03batches/s, epoch=10, loss=0.00134, split=train]
SequenceClassification:  49%|████▉     | 237/481 [00:28<00:28,  8.46batches/s, epoch=11, loss=0.00137, split=train]Model weights saved in dnabert_for_clash/checkpoint-1400/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  8.50batches/s, epoch=12, loss=0.00171, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  7.64batches/s, epoch=12, loss=0.00171, split=train]
SequenceClassification:  47%|████▋     | 228/481 [00:27<00:29,  8.45batches/s, epoch=13, loss=0.00223, split=train]Converged objectives: []
Evaluating...
***** Running Evaluatio

{'loss': 0.9945, 'learning_rate': 6e-05, 'train_SequenceClassification_loss': 1.3155318958439166, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.42}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=12, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 20.99batches/s, epoch=13, loss=6.36, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-1500/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-1500/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-1500/SequenceClassification/config.json


{'eval_loss': 3.1574337482452393, 'eval_runtime': 1.5276, 'eval_samples_per_second': 20.293, 'eval_steps_per_second': 20.293, 'eval_SequenceClassification_loss': 2.5026376287671157, 'eval_SequenceClassification_num_batches': 480, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.42}


Model weights saved in dnabert_for_clash/checkpoint-1500/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  8.16batches/s, epoch=13, loss=0.00254, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  7.65batches/s, epoch=13, loss=0.00254, split=train]
SequenceClassification:  31%|███       | 147/481 [00:17<00:39,  8.43batches/s, epoch=14, loss=0.0318, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 2.164, 'learning_rate': 6.400000000000001e-05, 'train_SequenceClassification_loss': 1.5507558739959495, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.44}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=13, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.19batches/s, epoch=14, loss=3.77, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-1600/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-1600/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-1600/SequenceClassification/config.json


{'eval_loss': 1.8802082538604736, 'eval_runtime': 1.5127, 'eval_samples_per_second': 20.493, 'eval_steps_per_second': 20.493, 'eval_SequenceClassification_loss': 2.4674213787498047, 'eval_SequenceClassification_num_batches': 512, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.44}


Model weights saved in dnabert_for_clash/checkpoint-1600/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  8.26batches/s, epoch=14, loss=0.00551, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  7.66batches/s, epoch=14, loss=0.00551, split=train]
SequenceClassification:  14%|█▎        | 66/481 [00:07<00:49,  8.43batches/s, epoch=15, loss=4.02, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 2.078, 'learning_rate': 6.800000000000001e-05, 'train_SequenceClassification_loss': 1.9187223217766731, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.47}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=14, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.07batches/s, epoch=15, loss=0.0185, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-1700/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-1700/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-1700/SequenceClassification/config.json


{'eval_loss': 2.0256054401397705, 'eval_runtime': 1.5212, 'eval_samples_per_second': 20.378, 'eval_steps_per_second': 20.378, 'eval_SequenceClassification_loss': 2.437742617353198, 'eval_SequenceClassification_num_batches': 544, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.47}


Model weights saved in dnabert_for_clash/checkpoint-1700/SequenceClassification/pytorch_model.bin
SequenceClassification:  97%|█████████▋| 466/481 [01:01<00:01,  8.38batches/s, epoch=15, loss=0.00721, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.3601, 'learning_rate': 7.2e-05, 'train_SequenceClassification_loss': 1.808089804451447, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.5}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=15, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.12batches/s, epoch=15, loss=0.00594, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-1800/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-1800/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-1800/SequenceClassification/config.json


{'eval_loss': 2.588226318359375, 'eval_runtime': 1.5174, 'eval_samples_per_second': 20.429, 'eval_steps_per_second': 20.429, 'eval_SequenceClassification_loss': 2.4416197272564912, 'eval_SequenceClassification_num_batches': 576, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.5}


Model weights saved in dnabert_for_clash/checkpoint-1800/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:08<00:00,  7.66batches/s, epoch=15, loss=0.00551, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:08<00:00,  7.06batches/s, epoch=15, loss=0.00551, split=train]
SequenceClassification:  80%|████████  | 385/481 [00:46<00:16,  5.93batches/s, epoch=16, loss=0.0339, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 2.0193, 'learning_rate': 7.6e-05, 'train_SequenceClassification_loss': 1.716939680948388, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.53}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=15, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 18.82batches/s, epoch=16, loss=0.0283, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-1900/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-1900/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-1900/SequenceClassification/config.json


{'eval_loss': 1.8177731037139893, 'eval_runtime': 1.7027, 'eval_samples_per_second': 18.207, 'eval_steps_per_second': 18.207, 'eval_SequenceClassification_loss': 2.4058424976664305, 'eval_SequenceClassification_num_batches': 608, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.53}


Model weights saved in dnabert_for_clash/checkpoint-1900/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:03<00:00,  8.48batches/s, epoch=16, loss=0.00503, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:03<00:00,  7.62batches/s, epoch=16, loss=0.00503, split=train]
SequenceClassification:  63%|██████▎   | 304/481 [00:36<00:20,  8.47batches/s, epoch=17, loss=3.77, split=train]  Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.9698, 'learning_rate': 8e-05, 'train_SequenceClassification_loss': 1.8812650823076256, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.55}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=16, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.07batches/s, epoch=17, loss=3.72, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-2000/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-2000/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-2000/SequenceClassification/config.json


{'eval_loss': 1.857690453529358, 'eval_runtime': 1.5209, 'eval_samples_per_second': 20.382, 'eval_steps_per_second': 20.382, 'eval_SequenceClassification_loss': 2.3813458556953266, 'eval_SequenceClassification_num_batches': 640, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.55}


Model weights saved in dnabert_for_clash/checkpoint-2000/SequenceClassification/pytorch_model.bin
SequenceClassification:  46%|████▋     | 223/481 [00:26<00:30,  8.53batches/s, epoch=18, loss=0.0981, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.6324, 'learning_rate': 8.4e-05, 'train_SequenceClassification_loss': 1.7612629435588605, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.58}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=17, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.25batches/s, epoch=18, loss=2.53, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-2100/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-2100/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-2100/SequenceClassification/config.json


{'eval_loss': 1.2974705696105957, 'eval_runtime': 1.508, 'eval_samples_per_second': 20.557, 'eval_steps_per_second': 20.557, 'eval_SequenceClassification_loss': 2.331569596953549, 'eval_SequenceClassification_num_batches': 672, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.58}


Model weights saved in dnabert_for_clash/checkpoint-2100/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  8.21batches/s, epoch=18, loss=0.00453, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  7.64batches/s, epoch=18, loss=0.00453, split=train]
SequenceClassification:  30%|██▉       | 142/481 [00:17<00:40,  8.45batches/s, epoch=19, loss=1.91, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.7434, 'learning_rate': 8.800000000000001e-05, 'train_SequenceClassification_loss': 1.6686349766086788, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.61}


SequenceClassification:   0%|          | 0/31 [00:52<?, ?batches/s, epoch=18, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.13batches/s, epoch=19, loss=0.183, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-2200/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-2200/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-2200/SequenceClassification/config.json


{'eval_loss': 0.9925918579101562, 'eval_runtime': 1.5165, 'eval_samples_per_second': 20.442, 'eval_steps_per_second': 20.442, 'eval_SequenceClassification_loss': 2.2695566748579123, 'eval_SequenceClassification_num_batches': 704, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.61}


Model weights saved in dnabert_for_clash/checkpoint-2200/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:03<00:00,  8.37batches/s, epoch=19, loss=0.00493, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:03<00:00,  7.60batches/s, epoch=19, loss=0.00493, split=train]
SequenceClassification:  13%|█▎        | 61/481 [00:07<00:49,  8.42batches/s, epoch=20, loss=4.22, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.624, 'learning_rate': 9.200000000000001e-05, 'train_SequenceClassification_loss': 1.8100189054151996, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.64}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=19, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 20.97batches/s, epoch=20, loss=0.0163, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-2300/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-2300/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-2300/SequenceClassification/config.json


{'eval_loss': 2.087830066680908, 'eval_runtime': 1.5283, 'eval_samples_per_second': 20.284, 'eval_steps_per_second': 20.284, 'eval_SequenceClassification_loss': 2.2588409181768405, 'eval_SequenceClassification_num_batches': 736, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.64}


Model weights saved in dnabert_for_clash/checkpoint-2300/SequenceClassification/pytorch_model.bin
SequenceClassification:  96%|█████████▌| 461/481 [01:00<00:02,  8.54batches/s, epoch=20, loss=0.0101, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.3914, 'learning_rate': 9.6e-05, 'train_SequenceClassification_loss': 1.771668959448114, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.67}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=20, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.10batches/s, epoch=20, loss=0.0083, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-2400/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-2400/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-2400/SequenceClassification/config.json


{'eval_loss': 2.421452283859253, 'eval_runtime': 1.5192, 'eval_samples_per_second': 20.405, 'eval_steps_per_second': 20.405, 'eval_SequenceClassification_loss': 2.262474231281582, 'eval_SequenceClassification_num_batches': 768, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.67}


Model weights saved in dnabert_for_clash/checkpoint-2400/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:07<00:00,  8.43batches/s, epoch=20, loss=0.00594, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:07<00:00,  7.08batches/s, epoch=20, loss=0.00594, split=train]
SequenceClassification:  79%|███████▉  | 380/481 [00:45<00:11,  8.48batches/s, epoch=21, loss=0.98, split=train]  Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 2.3961, 'learning_rate': 0.0001, 'train_SequenceClassification_loss': 1.8422518291380257, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.69}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=20, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.13batches/s, epoch=21, loss=0.887, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-2500/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-2500/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-2500/SequenceClassification/config.json


{'eval_loss': 0.7075479030609131, 'eval_runtime': 1.5166, 'eval_samples_per_second': 20.44, 'eval_steps_per_second': 20.44, 'eval_SequenceClassification_loss': 2.2005020512783084, 'eval_SequenceClassification_num_batches': 800, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.69}


Model weights saved in dnabert_for_clash/checkpoint-2500/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  8.23batches/s, epoch=21, loss=0.0333, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  7.66batches/s, epoch=21, loss=0.0333, split=train]
SequenceClassification:  62%|██████▏   | 299/481 [00:35<00:21,  8.53batches/s, epoch=22, loss=3.46, split=train]  Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.646, 'learning_rate': 0.00010400000000000001, 'train_SequenceClassification_loss': 1.8872897774269803, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.72}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=21, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.32batches/s, epoch=22, loss=3.41, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-2600/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-2600/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-2600/SequenceClassification/config.json


{'eval_loss': 1.7061891555786133, 'eval_runtime': 1.5035, 'eval_samples_per_second': 20.619, 'eval_steps_per_second': 20.619, 'eval_SequenceClassification_loss': 2.183532863358638, 'eval_SequenceClassification_num_batches': 832, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.72}


Model weights saved in dnabert_for_clash/checkpoint-2600/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  8.36batches/s, epoch=22, loss=0.0199, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  7.65batches/s, epoch=22, loss=0.0199, split=train]
SequenceClassification:  45%|████▌     | 218/481 [00:26<00:30,  8.50batches/s, epoch=23, loss=0.129, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.6293, 'learning_rate': 0.00010800000000000001, 'train_SequenceClassification_loss': 1.7314232522919775, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.75}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=22, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.19batches/s, epoch=23, loss=2.29, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-2700/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-2700/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-2700/SequenceClassification/config.json


{'eval_loss': 1.1881483793258667, 'eval_runtime': 1.5124, 'eval_samples_per_second': 20.497, 'eval_steps_per_second': 20.497, 'eval_SequenceClassification_loss': 2.147938285838151, 'eval_SequenceClassification_num_batches': 864, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.75}


Model weights saved in dnabert_for_clash/checkpoint-2700/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  8.42batches/s, epoch=23, loss=0.0099, split=train] Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  7.69batches/s, epoch=23, loss=0.0099, split=train]
SequenceClassification:  28%|██▊       | 137/481 [00:16<00:40,  8.39batches/s, epoch=24, loss=0.226, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.8798, 'learning_rate': 0.00011200000000000001, 'train_SequenceClassification_loss': 1.7134413984753192, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.78}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=23, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.09batches/s, epoch=24, loss=1.72, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-2800/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-2800/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-2800/SequenceClassification/config.json


{'eval_loss': 0.9526139497756958, 'eval_runtime': 1.5191, 'eval_samples_per_second': 20.407, 'eval_steps_per_second': 20.407, 'eval_SequenceClassification_loss': 2.106104872726064, 'eval_SequenceClassification_num_batches': 896, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.78}


Model weights saved in dnabert_for_clash/checkpoint-2800/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:03<00:00,  8.44batches/s, epoch=24, loss=0.0624, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:03<00:00,  7.62batches/s, epoch=24, loss=0.0624, split=train]
SequenceClassification:  12%|█▏        | 56/481 [00:06<00:49,  8.51batches/s, epoch=25, loss=2.97, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.7686, 'learning_rate': 0.000116, 'train_SequenceClassification_loss': 1.8605698600951581, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.8}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=24, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.05batches/s, epoch=25, loss=0.0553, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-2900/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-2900/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-2900/SequenceClassification/config.json


{'eval_loss': 1.5007637739181519, 'eval_runtime': 1.5222, 'eval_samples_per_second': 20.365, 'eval_steps_per_second': 20.365, 'eval_SequenceClassification_loss': 2.083673400286804, 'eval_SequenceClassification_num_batches': 928, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.8}


Model weights saved in dnabert_for_clash/checkpoint-2900/SequenceClassification/pytorch_model.bin
SequenceClassification:  95%|█████████▍| 456/481 [01:00<00:02,  8.45batches/s, epoch=25, loss=0.138, split=train] Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.5643, 'learning_rate': 0.00012, 'train_SequenceClassification_loss': 1.7857373125180602, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.83}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=25, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.18batches/s, epoch=25, loss=0.112, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-3000/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-3000/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-3000/SequenceClassification/config.json


{'eval_loss': 1.1867918968200684, 'eval_runtime': 1.5245, 'eval_samples_per_second': 20.334, 'eval_steps_per_second': 20.334, 'eval_SequenceClassification_loss': 2.0526578336064025, 'eval_SequenceClassification_num_batches': 960, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.83}


Model weights saved in dnabert_for_clash/checkpoint-3000/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:08<00:00,  8.11batches/s, epoch=25, loss=0.0427, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:08<00:00,  7.06batches/s, epoch=25, loss=0.0427, split=train]
SequenceClassification:  78%|███████▊  | 375/481 [00:45<00:12,  8.47batches/s, epoch=26, loss=1.25, split=train]  Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.9925, 'learning_rate': 0.000124, 'train_SequenceClassification_loss': 1.7378839739114047, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.86}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=25, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.08batches/s, epoch=26, loss=1.16, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-3100/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-3100/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-3100/SequenceClassification/config.json


{'eval_loss': 0.7635958194732666, 'eval_runtime': 1.5201, 'eval_samples_per_second': 20.394, 'eval_steps_per_second': 20.394, 'eval_SequenceClassification_loss': 2.011469815621357, 'eval_SequenceClassification_num_batches': 992, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.86}


Model weights saved in dnabert_for_clash/checkpoint-3100/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:03<00:00,  8.36batches/s, epoch=26, loss=0.033, split=train] Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:03<00:00,  7.62batches/s, epoch=26, loss=0.033, split=train]
SequenceClassification:  61%|██████    | 294/481 [00:35<00:21,  8.57batches/s, epoch=27, loss=4.06, split=train]  Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.6097, 'learning_rate': 0.00012800000000000002, 'train_SequenceClassification_loss': 1.8412406617822126, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.89}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=26, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.33batches/s, epoch=27, loss=3.98, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-3200/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-3200/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-3200/SequenceClassification/config.json


{'eval_loss': 1.9823851585388184, 'eval_runtime': 1.5023, 'eval_samples_per_second': 20.635, 'eval_steps_per_second': 20.635, 'eval_SequenceClassification_loss': 2.012509567623283, 'eval_SequenceClassification_num_batches': 1024, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.89}


Model weights saved in dnabert_for_clash/checkpoint-3200/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  8.48batches/s, epoch=27, loss=0.0109, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  7.65batches/s, epoch=27, loss=0.0109, split=train]
SequenceClassification:  44%|████▍     | 213/481 [00:25<00:31,  8.53batches/s, epoch=28, loss=0.0487, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.6903, 'learning_rate': 0.000132, 'train_SequenceClassification_loss': 1.7600788341006264, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.91}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=27, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.23batches/s, epoch=28, loss=3.27, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-3300/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-3300/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-3300/SequenceClassification/config.json


{'eval_loss': 1.639068603515625, 'eval_runtime': 1.5095, 'eval_samples_per_second': 20.536, 'eval_steps_per_second': 20.536, 'eval_SequenceClassification_loss': 2.0027330867475635, 'eval_SequenceClassification_num_batches': 1056, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.91}


Model weights saved in dnabert_for_clash/checkpoint-3300/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  8.56batches/s, epoch=28, loss=0.0177, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  7.67batches/s, epoch=28, loss=0.0177, split=train]
SequenceClassification:  27%|██▋       | 132/481 [00:15<00:40,  8.51batches/s, epoch=29, loss=1.35, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 2.2995, 'learning_rate': 0.00013600000000000003, 'train_SequenceClassification_loss': 1.9034341648817061, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.94}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=28, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 17.34batches/s, epoch=29, loss=0.346, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-3400/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-3400/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-3400/SequenceClassification/config.json


{'eval_loss': 0.7912241220474243, 'eval_runtime': 1.8483, 'eval_samples_per_second': 16.772, 'eval_steps_per_second': 16.772, 'eval_SequenceClassification_loss': 1.9666912605036273, 'eval_SequenceClassification_num_batches': 1088, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.94}


Model weights saved in dnabert_for_clash/checkpoint-3400/SequenceClassification/pytorch_model.bin
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  8.17batches/s, epoch=29, loss=0.00892, split=train]Converged objectives: []
SequenceClassification: 100%|██████████| 481/481 [01:02<00:00,  7.64batches/s, epoch=29, loss=0.00892, split=train]
SequenceClassification:  11%|█         | 51/481 [00:06<00:50,  8.46batches/s, epoch=30, loss=4.4, split=train] Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.7679, 'learning_rate': 0.00014, 'train_SequenceClassification_loss': 2.03755366403237, 'train_SequenceClassification_num_batches': 1000, 'epoch': 0.97}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=29, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.20batches/s, epoch=30, loss=0.0132, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-3500/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-3500/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-3500/SequenceClassification/config.json


{'eval_loss': 2.1921253204345703, 'eval_runtime': 1.5118, 'eval_samples_per_second': 20.505, 'eval_steps_per_second': 20.505, 'eval_SequenceClassification_loss': 1.9711867190650083, 'eval_SequenceClassification_num_batches': 1120, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 0.97}


Model weights saved in dnabert_for_clash/checkpoint-3500/SequenceClassification/pytorch_model.bin
SequenceClassification:  94%|█████████▍| 451/481 [00:58<00:03,  8.52batches/s, epoch=30, loss=0.0189, split=train]Converged objectives: []
Evaluating...
***** Running Evaluation *****
  Num examples = 31
  Batch size = 1


{'loss': 1.6123, 'learning_rate': 0.000144, 'train_SequenceClassification_loss': 1.8092812393018975, 'train_SequenceClassification_num_batches': 1000, 'epoch': 1.0}


SequenceClassification:   0%|          | 0/31 [00:51<?, ?batches/s, epoch=30, loss=-1, split=eval]
SequenceClassification: 32batches [00:01, 21.21batches/s, epoch=30, loss=0.0149, split=eval]                   
Converged objectives: []
tokenizer config file saved in dnabert_for_clash/checkpoint-3600/SequenceClassification/tokenizer_config.json
Special tokens file saved in dnabert_for_clash/checkpoint-3600/SequenceClassification/special_tokens_map.json
Configuration saved in dnabert_for_clash/checkpoint-3600/SequenceClassification/config.json


{'eval_loss': 2.1321206092834473, 'eval_runtime': 1.5146, 'eval_samples_per_second': 20.468, 'eval_steps_per_second': 20.468, 'eval_SequenceClassification_loss': 1.973819205167375, 'eval_SequenceClassification_num_batches': 1152, 'eval_SequenceClassification_SequenceAucPr': 0.75, 'epoch': 1.0}


Model weights saved in dnabert_for_clash/checkpoint-3600/SequenceClassification/pytorch_model.bin
SequenceClassification:  97%|█████████▋| 466/481 [01:05<00:01,  7.78batches/s, epoch=30, loss=0.00971, split=train]