# Imports

In [1]:
%pip install -r ../requirements.txt  

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: C:\Users\1\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [2]:
import sys, os

sys.path.append(os.path.abspath("../src"))
sys.path.append(os.path.abspath("../data"))

import os
from typing import Dict, List, Tuple

import numpy as np
import torch
from tqdm import tqdm, trange
from joblib import load

import evaluate
from transformers import (
    AutoConfig,
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    PreTrainedTokenizer,
)
from peft import get_peft_model, LoraConfig, TaskType

from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    matthews_corrcoef,
    roc_auc_score,
)

from torch.utils import data

import sparse_vector
import sparse_vector.sparse_vector

sys.modules["Sparse_vector"] = sparse_vector
sys.modules["Sparse_vector.sparse_vector"] = sparse_vector.sparse_vector

from modeling_hyena import HyenaDNAForTokenClassification

mcc = evaluate.load("matthews_correlation")
f1_metric = evaluate.load("f1")
accuracy_metric = evaluate.load("accuracy")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")

In [4]:
ASSEMBLY_d = {}
chroms_d = {}
all_features_d = {}
groups_d = {}
feature_names_d = {}
G4_d = {}
black_list_d = {}
DNA_d = {}
DNA_features_d = {}

# Data collection

## metadata

Define the assembly version of quadruplexes from kidney tissue, target chromosomes, and feature files. Metadata structures are then populated for lookup by assembly mode.

In [18]:
chroms = [f"chr{i}" for i in list(range(1, 23)) + ["X", "Y"]]
G4 = load("../data/g4.pkl")
black_list = load("../data/blacklist_hg38_v2.pkl")

## DNA sequence assembly

Concatenate individual chromosome fragment files into complete sequences. This section builds a full-genome DNA string for downstream analysis.

In [23]:
def load_chrom_sequence(chrom: str) -> str:
    base_dir = os.path.abspath(os.path.join("..","data" ,"z_dna", "hg38_dna"))
    files = sorted(f for f in os.listdir(base_dir) if f.startswith(f"{chrom}_"))
    return "".join(load(os.path.join(base_dir, f)) for f in files)


DNA = {chrom: load_chrom_sequence(chrom) for chrom in tqdm(chroms)}

100%|██████████| 24/24 [00:04<00:00,  5.94it/s]


In [24]:
mode = 'hg38'
chroms_d[mode] = chroms
G4_d[mode] = G4
black_list_d[mode] = black_list
DNA_d[mode] = DNA
# DNA_features_d[mode] = DNA_features

## Creating and labeling windows

Generate fixed-length windows across the genome and assign labels based on Z-DNA predictions. This prepares the dataset for model training.

In [10]:
width = 100

In [11]:
np.random.seed(10)

ints_in = []
ints_out = []

for chrm in chroms:
    for st in trange(0, G4[chrm].shape - width, width):
        interval = [st, min(st + width, G4[chrm].shape)]
        N_count = sum([bp == "N" for bp in DNA[chrm][interval[0] : interval[1]]])
        bl_count = black_list[chrm][interval[0] : interval[1]].sum()
        if N_count > width / 2 or bl_count > 0:
            continue
        else:
            if G4[chrm][interval[0] : interval[1]].any():
                ints_in.append([chrm, int(interval[0]), int(interval[1]), 1])
            else:
                ints_out.append([chrm, int(interval[0]), int(interval[1]), 0])
print(len(ints_in))
print(len(ints_out))

ints_in_full = ints_in
ints_out_full = ints_out

100%|██████████| 2489564/2489564 [01:02<00:00, 39715.28it/s]
100%|██████████| 2421935/2421935 [01:01<00:00, 39553.75it/s]
100%|██████████| 1982955/1982955 [00:49<00:00, 39767.52it/s]
100%|██████████| 1902145/1902145 [00:48<00:00, 38999.97it/s]
100%|██████████| 1815382/1815382 [00:45<00:00, 40213.00it/s]
100%|██████████| 1708059/1708059 [00:43<00:00, 39125.99it/s]
100%|██████████| 1593459/1593459 [00:40<00:00, 39366.04it/s]
100%|██████████| 1451386/1451386 [00:35<00:00, 40335.83it/s]
100%|██████████| 1383947/1383947 [00:31<00:00, 43380.38it/s]
100%|██████████| 1337974/1337974 [00:34<00:00, 38723.29it/s]
100%|██████████| 1350866/1350866 [00:33<00:00, 40129.54it/s]
100%|██████████| 1332753/1332753 [00:33<00:00, 40143.97it/s]
100%|██████████| 1143643/1143643 [00:26<00:00, 42571.46it/s]
100%|██████████| 1070437/1070437 [00:26<00:00, 40982.35it/s]
100%|██████████| 1019911/1019911 [00:22<00:00, 44419.14it/s]
100%|██████████| 903383/903383 [00:21<00:00, 41772.80it/s]
100%|██████████| 832574/83

139966
28172318





In [12]:
print(len(ints_in_full))
print(len(ints_out_full))

139966
28172318


## Balance of classes

Compute and display the class distribution to assess dataset imbalance. Helps in deciding whether to apply sampling strategies or class weighting.

In [13]:
ints_in = ints_in_full
ints_out = [ints_out_full[i] for i in np.random.choice(range(len(ints_out_full)),
                                                    size=len(ints_in) * 3, replace=False)] # 3:1 ratio
print(len(ints_in)) 
print(len(ints_out))


139966
419898


In [14]:
ints_in[0]

['chr1', 827400, 827500, 1]

# Dateset class and division of dataset

In [None]:


class DNATokenClassificationDataset(data.Dataset):
    def __init__(
        self,
        chroms: List[str],
        dna_source: Dict[str, str],
        labels_source: Dict[str, torch.Tensor],
        intervals: List[Tuple[str, int, int]],
        tokenizer: PreTrainedTokenizer,
        max_length: int,
    ):
        """
        chroms - list of available chromosomes
        dna_source - {chrom: dna_string}
        labels_source - {chrom: Tensor[length_of_chrom]} with 0/1 labels by nucleotide
        intervals - [(chrom, start, end), ...]
        tokeniser - HyenaDNATokenizer
        max_length - Lmax for padding/truncation
        """
        self.intervals = intervals
        self.dna_source = dna_source
        self.labels_source = labels_source
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.intervals)

    def __getitem__(self, idx):
        interval = self.intervals[idx]
        chrom = interval[0]
        start = interval[1]
        end = interval[2]
        seq = self.dna_source[chrom][start:end].upper()
        char_labels = self.labels_source[chrom][start:end]  # Tensor of shape (L,)

        enc = self.tokenizer(
            seq,
            truncation=True,
            padding="max_length",
            max_length=self.max_length + 1,
            return_special_tokens_mask=True,
            return_attention_mask=True,
            # add_special_tokens=False,
        )
        input_ids = torch.tensor(enc["input_ids"], dtype=torch.long)
        attention_mask = torch.tensor(enc["attention_mask"], dtype=torch.long)
        special_tokens_mask = torch.tensor(enc["special_tokens_mask"], dtype=torch.long)

        # Align labels: one character per token
        labels_by_tok = []
        char_ptr = 0
        for is_special in special_tokens_mask.tolist():
            if is_special:
                # CLS, SEP, PAD → ignore
                labels_by_tok.append(-100)
            else:
                # If there are any more character labels, take the next one
                if char_ptr < len(char_labels):
                    labels_by_tok.append(int(char_labels[char_ptr]))
                    char_ptr += 1
                else:
                    # The original string has been truncated → put -100
                    labels_by_tok.append(-100)

        labels_by_tok = torch.tensor(labels_by_tok, dtype=torch.long)

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": labels_by_tok,
            "seq": seq,
        }

# Loading model

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 
device

device(type='cuda')

I was training on the HyenaDNA with context window length 1k and 32k, there's no change in quality, 32k just has params to work with larger context which are unused in my case (since length of my seq <1000)

In [17]:
model_name = "LongSafari/hyenadna-tiny-1k-seqlen-hf"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# SequenceClassification model to get the "hyena" + head score
config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
config.num_labels = 2
seq_model, seq_loading_info = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    config=config,
    trust_remote_code=True,
    output_loading_info=True,
)
# print(config)
# TokenClassificationPerToken without loading weights
token_model = HyenaDNAForTokenClassification(config)

# weights from seq_model to token_model
seq_sd = seq_model.state_dict()
token_sd = token_model.state_dict()

# Backbone: all parameters "hyena."
for k, v in seq_sd.items():
    if k.startswith("hyena."):
        token_sd[k] = v.clone()
print(seq_sd.keys())
# rename score → classifier
token_sd["classifier.weight"] = seq_sd["score.weight"].clone()

missing, unexpected = token_model.load_state_dict(token_sd, strict=False)

token_model.to(device)

Some weights of HyenaDNAForSequenceClassification were not initialized from the model checkpoint at LongSafari/hyenadna-tiny-1k-seqlen-hf and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


odict_keys(['hyena.backbone.embeddings.word_embeddings.weight', 'hyena.backbone.layers.0.mixer.in_proj.weight', 'hyena.backbone.layers.0.mixer.in_proj.bias', 'hyena.backbone.layers.0.mixer.out_proj.weight', 'hyena.backbone.layers.0.mixer.out_proj.bias', 'hyena.backbone.layers.0.mixer.short_filter.weight', 'hyena.backbone.layers.0.mixer.short_filter.bias', 'hyena.backbone.layers.0.mixer.filter_fn.bias', 'hyena.backbone.layers.0.mixer.filter_fn.pos_emb.z', 'hyena.backbone.layers.0.mixer.filter_fn.pos_emb.t', 'hyena.backbone.layers.0.mixer.filter_fn.implicit_filter.0.weight', 'hyena.backbone.layers.0.mixer.filter_fn.implicit_filter.0.bias', 'hyena.backbone.layers.0.mixer.filter_fn.implicit_filter.1.freq', 'hyena.backbone.layers.0.mixer.filter_fn.implicit_filter.2.weight', 'hyena.backbone.layers.0.mixer.filter_fn.implicit_filter.2.bias', 'hyena.backbone.layers.0.mixer.filter_fn.implicit_filter.3.freq', 'hyena.backbone.layers.0.mixer.filter_fn.implicit_filter.4.weight', 'hyena.backbone.laye

HyenaDNAForTokenClassification(
  (hyena): HyenaDNAModel(
    (backbone): HyenaLMBackbone(
      (embeddings): HyenaEmbeddings(
        (word_embeddings): Embedding(16, 128)
      )
      (dropout): Dropout(p=0.1, inplace=False)
      (layers): ModuleList(
        (0-1): 2 x HyenaBlock(
          (mixer): HyenaOperator(
            (dropout): Dropout(p=0.0, inplace=False)
            (in_proj): Linear(in_features=128, out_features=384, bias=True)
            (out_proj): Linear(in_features=128, out_features=128, bias=True)
            (short_filter): Conv1d(384, 384, kernel_size=(3,), stride=(1,), padding=(2,), groups=384)
            (filter_fn): HyenaFilter(
              (dropout): Dropout(p=0.0, inplace=False)
              (pos_emb): HyenaPositionalEmbedding()
              (implicit_filter): Sequential(
                (0): Linear(in_features=5, out_features=64, bias=True)
                (1): HyenaSin()
                (2): Linear(in_features=64, out_features=64, bias=True)
     

Check if models have some weights

In [18]:
seq_model.to(device)
for p1, p2 in zip(seq_model.parameters(), token_model.parameters()):
    if p1.data.ne(p2.data).sum() > 0:
        print("Parameters are not equal")
        print("parameter name:", p1.name)
        print("parameter name:", p2.name)
for p1, p2 in zip(seq_sd.keys(), token_sd.keys()):
    if p1 != p2 and p2 != "classifier.weight" and p1 != "score.weight":
        print("Parameters are not equal")
        print("parameter name:", p1)
        print("parameter name:", p2)
for (name1, param1), (name2, param2) in zip(seq_sd.items(), token_sd.items()):
    if name2 != "classifier.weight" and name1 != "score.weight":
        if not torch.equal(param1, param2):
            print("Parameters are not equal")
            print("parameter name in seq_sd:", name1)
            print("parameter name in token_sd:", name2)

In [19]:
seq_sd.keys()

odict_keys(['hyena.backbone.embeddings.word_embeddings.weight', 'hyena.backbone.layers.0.mixer.in_proj.weight', 'hyena.backbone.layers.0.mixer.in_proj.bias', 'hyena.backbone.layers.0.mixer.out_proj.weight', 'hyena.backbone.layers.0.mixer.out_proj.bias', 'hyena.backbone.layers.0.mixer.short_filter.weight', 'hyena.backbone.layers.0.mixer.short_filter.bias', 'hyena.backbone.layers.0.mixer.filter_fn.bias', 'hyena.backbone.layers.0.mixer.filter_fn.pos_emb.z', 'hyena.backbone.layers.0.mixer.filter_fn.pos_emb.t', 'hyena.backbone.layers.0.mixer.filter_fn.implicit_filter.0.weight', 'hyena.backbone.layers.0.mixer.filter_fn.implicit_filter.0.bias', 'hyena.backbone.layers.0.mixer.filter_fn.implicit_filter.1.freq', 'hyena.backbone.layers.0.mixer.filter_fn.implicit_filter.2.weight', 'hyena.backbone.layers.0.mixer.filter_fn.implicit_filter.2.bias', 'hyena.backbone.layers.0.mixer.filter_fn.implicit_filter.3.freq', 'hyena.backbone.layers.0.mixer.filter_fn.implicit_filter.4.weight', 'hyena.backbone.laye

In [None]:
all_intervals = ints_in + ints_out

# 20 % for test
train_val, test_intervals = train_test_split(
    all_intervals,
    test_size=0.2,
    stratify=[f"{label}_{chrom}" for chrom, _, _, label in all_intervals],
    random_state=42,
)

# 80 % for train + val
train_intervals, val_intervals = train_test_split(
    train_val,
    test_size=0.25,  # 0.25 от 80% = 20% 
    stratify=[f"{label}_{chrom}" for chrom, _, _, label in train_val],
    random_state=42,
)

train_ds = DNATokenClassificationDataset(
    chroms=chroms,
    dna_source=DNA,
    labels_source=G4,
    intervals=[(c, s, e) for c, s, e, _ in train_intervals],
    tokenizer=tokenizer,
    max_length=width,
)
val_ds = DNATokenClassificationDataset(
    chroms=chroms,
    dna_source=DNA,
    labels_source=G4,
    intervals=[(c, s, e) for c, s, e, _ in val_intervals],
    tokenizer=tokenizer,
    max_length=width,
)
test_ds = DNATokenClassificationDataset(
    chroms=chroms,
    dna_source=DNA,
    labels_source=G4,
    intervals=[(c, s, e) for c, s, e, _ in test_intervals],
    tokenizer=tokenizer,
    max_length=width,
)

In [21]:
for i in range(len(train_ds)):
    if sum(train_ds[i]['labels'].numpy()) != 0 and sum(train_ds[i]['labels'].numpy()) != -100:
        print(i, train_ds[i]['input_ids'])
        print(train_ds[i]['labels'])
        print(train_ds[i]['attention_mask'])
        break

1 tensor([ 8, 10,  9,  7,  8,  8,  8,  8, 10,  9,  8,  8, 10,  9,  7,  8,  8,  7,
         7,  9,  8,  8,  7, 10,  9, 10,  8, 10,  9,  7,  7,  8,  7,  9,  9,  7,
         9,  9,  8, 10,  8,  7,  7,  9,  8,  8,  8,  8,  7,  9,  9,  9,  9,  9,
         8,  8,  9,  9,  9,  9,  9,  8, 10,  9,  8,  8,  8,  8,  8,  9,  9,  7,
         8,  7, 10,  9,  8, 10,  9,  9,  8,  7,  9,  7,  9,  8,  7,  9,  9, 10,
         9,  9,  7,  9,  8, 10,  9, 10,  9,  9,  1])
tensor([   1,    1,    1,    1,    1,    1,    1,    1,    1,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    1,    1,    1,    1,    1,    1,    1,    1,    1,
           1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
           1,    1,    1,    1,    1,    1,    1,    1,    1,    

In [22]:
train_ds[0]['seq']

'CGAGTGCAGGGCTGCTGCTCAGAGGCCCGCCCAGGCGCCCCGCAGGGAGATGGCCCACCACAGAGCGCCAGGGGAACTGTTCTTCCAGCGCCAGGGAACG'

In [23]:
example = train_ds[1]
print("Length of input_ids:", len(example["input_ids"]))
print("Length of labels:", len(example["labels"]))
print("Length of attention_mask:", len(example["attention_mask"]))

Length of input_ids: 101
Length of labels: 101
Length of attention_mask: 101


# Defining function for metrics calculations
Evaluated using accuracy, F1 (binary, macro, micro, weighted), precision, recall, ROC AUC, and Matthews correlation.  
In some cases did not calculate all metrics, but the most important for every case of experiments are F1 binary and MCC

In [24]:
def compute_metrics2(p):
    preds = p.predictions.argmax(-1).flatten()
    labs = p.label_ids.flatten()
    mask = labs != -100
    preds = preds[mask]
    labs = labs[mask]

    return {
        "accuracy": accuracy_score(labs, preds),
        "f1": f1_score(labs, preds, average="binary"),
        "roc_auc": roc_auc_score(labs, preds),
        "matthews": matthews_corrcoef(labs, preds),
        "precision": precision_metric.compute(predictions=preds, references=labs)[
            "precision"
        ],
        "recall": recall_metric.compute(predictions=preds, references=labs)["recall"],
        "f1_macro": f1_metric.compute(
            predictions=preds, references=labs, average="macro"
        )["f1"],
        "f1_micro": f1_metric.compute(
            predictions=preds, references=labs, average="micro"
        )["f1"],
        "f1_weighted": f1_metric.compute(
            predictions=preds, references=labs, average="weighted"
        )["f1"],
    }

# Experiments for finding best params for G4 task without PEFT methods

## Important note before checking code below
- Verify that your environment uses the correct versions of libraries and that file paths match your system. Otherwise, subsequent cells may error out due to missing dependencies.
- It may look dirty, but I specifically titled experiments everywhere for better understanding, in particular, I highlighted the settings from the best training.
- TLDR of exps below.
- In the case of HyenaDNA, LoRa showed that this was useless since the code speed is the same (thanks to the hyena operator), but it was interesting to verify this with a few different experiments
### Overview of Conducted Experiments

I ran four main experiment groups to benchmark training strategies and parameter-efficient fine-tuning:

1. **Head-only warmup (Stage 1)**

   * **Frozen backbone**, train head for 3 epochs
   * Large LR (1e-3) to quickly adapt classification layer

2. **Full fine-tuning (Stage 2)**

   * **Unfrozen backbone and head**, 6 epochs
   * LR = 1e-5 for backbone, 5e-4 for head; weight decay = 0.01

3. **Extended full fine-tuning**

   * 12 epochs at LR = 3e-4 (and variants at 6e-4, 5e-4)
   * Batch sizes 32–64, linear or plateau schedulers, FP16, online data augmentation

4. **LoRA parameter-efficient tuning**

   * Rank r=8, α=32, dropout=0.1
   * **Exp 1:** Full Hyena modules, 3 epochs
   * **Exp 2:** Full Hyena modules, 10 epochs, linear warmup
   * **Exp 3:** Mixer-only modules, 5 epochs

Each experiment logs F1 on validation and saves the best model. 

#### Result
Best result was achieved in the following case. 


*Performed standard training with batch size = 32, sequence length = 100, and 10% warmup.*
*Used class-weighted cross-entropy loss (CE=1 and 8) with two-class weights.*  
*Trained for 12 epochs; achieved F1 score of **0.5788**.*


Also, it can be seen that all experiments ended with results close to each other, ~F1 0.57. This result shows that model is not ready for DNA sub-domain task - classification of G4, so these results will not be interpreted.   

## Experiment : CE 07 and 2 Two-Stage Fine-Tuning (F1 = 0.5779)  
Loss weighting: CE = 0.7 and 2; model trained in two stages:

- **Stage 1 (Frozen Backbone):**  
  Trained only the classification head for 3 epochs using LR = 1e-3.  
  All Hyena backbone layers frozen.

- **Stage 2 (Full Fine-Tuning):**  
  Unfroze the entire model and fine-tuned for 6 more epochs.  
  Used discriminative learning rates: 1e-5 (backbone), 5e-4 (head).  
  Achieved F1 score of **0.5779** on the evaluation set.


In [66]:
# --- STEP 1: Freeze-backbone, train only the head ---
# Freeze all backbone parameters
for param in token_model.hyena.parameters():
    param.requires_grad = False

# Check that the classifier still requires_grad=True
for name, param in token_model.named_parameters():
    if "classifier" in name:
        assert param.requires_grad

# Re-create Trainer with a large LR for the head
training_args_stage1_augdata = TrainingArguments(
    output_dir="./stage1_g4",
    num_train_epochs=3,
    learning_rate=1e-3, # train the head quickly
    per_device_train_batch_size=64,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    save_safetensors=False

)

trainer_stage1_augdata = Trainer(
    model=token_model,
    args=training_args_stage1_augdata,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics2,
)
trainer_stage1_augdata.train()

# After that, trainer_stage1.model stores the weights of the backbone (frozen)
# and trained head (F1 best).

# --- STEP 2: unfreeze-backbone and retrain the whole model ---
# Remove freeze
for param in token_model.hyena.parameters():
    param.requires_grad = True

from torch.optim import AdamW
optimizer = AdamW(
    [
      { "params": token_model.hyena.parameters(), "lr": 1e-5},
      { "params": token_model.classifier.parameters(), "lr": 5e-4},
    ],
    weight_decay=0.01
)

# New TrainingArguments with a lower base LR
training_args_stage2_augdata = TrainingArguments(
    output_dir="./stage2_g4",
    num_train_epochs=6,
    learning_rate=5e-5, # since the backbone is now also learning
    per_device_train_batch_size=64,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    save_safetensors=False

)

# Trainer with custom optimiser
trainer_stage2_augdata = Trainer(
    model=token_model,
    args=training_args_stage2_augdata,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    optimizers=(optimizer, None), 
    compute_metrics=compute_metrics2,
)

trainer_stage2_augdata.can_return_loss = True
trainer_stage2_augdata.train()

  trainer_stage1_augdata = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Roc Auc,Matthews,Precision,Recall,F1 Macro,F1 Micro,F1 Weighted
1,0.2284,0.224746,0.914438,0.576013,0.821901,0.542513,0.48397,0.711288,0.764216,0.914438,0.921661
2,0.2228,0.224792,0.913778,0.575643,0.823547,0.542607,0.481435,0.715689,0.763829,0.913778,0.92126
3,0.2264,0.224746,0.914416,0.576196,0.822218,0.542772,0.483894,0.71201,0.764299,0.914416,0.921661


  trainer_stage2_augdata = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Roc Auc,Matthews,Precision,Recall,F1 Macro,F1 Micro,F1 Weighted
1,0.2262,0.224132,0.915699,0.577818,0.820185,0.543797,0.489024,0.706012,0.765496,0.915699,0.922503
2,0.22,0.224558,0.91455,0.576981,0.822825,0.543683,0.484461,0.713182,0.764728,0.91455,0.921793
3,0.2228,0.225966,0.916667,0.577268,0.816302,0.542323,0.492976,0.696332,0.765523,0.916667,0.923012
4,0.2244,0.225085,0.915894,0.577363,0.818949,0.54305,0.489791,0.703068,0.765332,0.915894,0.922582
5,0.2242,0.22517,0.91566,0.577188,0.81948,0.543013,0.488841,0.704511,0.765173,0.91566,0.922437
6,0.2249,0.225195,0.915331,0.577008,0.820317,0.543052,0.487516,0.706743,0.764983,0.915331,0.922238


TrainOutput(global_step=31494, training_loss=0.22400511576121457, metrics={'train_runtime': 3202.4452, 'train_samples_per_second': 629.365, 'train_steps_per_second': 9.834, 'total_flos': 530457970977792.0, 'train_loss': 0.22400511576121457, 'epoch': 6.0})

In [67]:
test_metrics = trainer_stage2_augdata.evaluate(eval_dataset=test_ds)
print("=== Тестовые метрики ===")
for k, v in test_metrics.items():
    print(f"{k}: {v:.4f}")

=== Тестовые метрики ===
eval_loss: 0.2242
eval_accuracy: 0.9159
eval_f1: 0.5779
eval_roc_auc: 0.8206
eval_matthews: 0.5441
eval_precision: 0.4887
eval_recall: 0.7068
eval_f1_macro: 0.7656
eval_f1_micro: 0.9159
eval_f1_weighted: 0.9227
eval_runtime: 323.4118
eval_samples_per_second: 346.2240
eval_steps_per_second: 43.2790
epoch: 6.0000


In [68]:
save_dir = "./models/hyenadna-small-1k_g4_len100_bs64_freeze_3_epochs_unfreeze_6_epochs_bestmodel"
trainer_stage2_augdata.save_model(save_dir)     
tokenizer.save_pretrained(save_dir)     


('./models/hyenadna-small-1k_g4_len100_bs64_freeze_3_epochs_unfreeze_6_epochs_bestmodel\\tokenizer_config.json',
 './models/hyenadna-small-1k_g4_len100_bs64_freeze_3_epochs_unfreeze_6_epochs_bestmodel\\special_tokens_map.json',
 './models/hyenadna-small-1k_g4_len100_bs64_freeze_3_epochs_unfreeze_6_epochs_bestmodel\\added_tokens.json')

## Experiment: Seq Length 100, Batch 64, CE=1 and 8, F1 = 0.5772  
Performed standard training with batch size = 64, sequence length = 100, and 10% warmup.  
Used class-weighted cross-entropy loss (CE=1 and 8) with two-class weights.  
Trained for 12 epochs; achieved F1 score of **0.5772**.


In [61]:
training_args = TrainingArguments(
    output_dir="./results/hyenadna1k_g4_finetune_len100_batch64_onlinedatagen",
    label_names=["labels"],
    num_train_epochs=12,
    learning_rate=3e-4,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    optim="adamw_torch",
    weight_decay=0.01,
    lr_scheduler_type="reduce_lr_on_plateau",
    warmup_ratio=0.10,
    fp16=True,
    save_total_limit=3,
    logging_strategy="epoch",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    save_safetensors=False,
)

trainer = Trainer(
    model=token_model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics2,
)

trainer.can_return_loss = True
trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Roc Auc,Matthews,Precision,Recall,F1 Macro,F1 Micro,F1 Weighted
1,0.2409,0.236566,0.906686,0.565824,0.83264,0.536303,0.456452,0.744128,0.756775,0.906686,0.91652
2,0.2353,0.235566,0.910368,0.5676,0.823633,0.535054,0.468464,0.719955,0.758801,0.910368,0.918755
3,0.2336,0.234375,0.915958,0.573499,0.81372,0.538054,0.489896,0.69151,0.763443,0.915958,0.922345
4,0.2326,0.233451,0.907738,0.5702,0.835423,0.541235,0.460321,0.748983,0.759261,0.907738,0.917425
5,0.2317,0.234475,0.920056,0.575371,0.802892,0.537808,0.508296,0.662839,0.765623,0.920056,0.924783
6,0.2308,0.232428,0.916248,0.576922,0.817219,0.542203,0.491222,0.698846,0.765223,0.916248,0.922751
7,0.2301,0.232675,0.912409,0.573416,0.824977,0.540907,0.476219,0.720465,0.762305,0.912409,0.920325
8,0.2292,0.232428,0.915379,0.576317,0.819252,0.542118,0.487672,0.704347,0.764656,0.915379,0.922216
9,0.2282,0.233581,0.91719,0.576511,0.81362,0.540975,0.495176,0.689817,0.76531,0.91719,0.923254
10,0.2271,0.234361,0.914161,0.573882,0.819979,0.539969,0.482763,0.7074,0.763077,0.914161,0.921354


TrainOutput(global_step=62988, training_loss=0.23078379985634664, metrics={'train_runtime': 5515.0981, 'train_samples_per_second': 730.906, 'train_steps_per_second': 11.421, 'total_flos': 1060915941955584.0, 'train_loss': 0.23078379985634664, 'epoch': 12.0})

In [62]:
test_metrics = trainer.evaluate(eval_dataset=test_ds)
print("=== Тестовые метрики ===")
for k, v in test_metrics.items():
    print(f"{k}: {v:.4f}")

=== Тестовые метрики ===
eval_loss: 0.2322
eval_accuracy: 0.9165
eval_f1: 0.5772
eval_roc_auc: 0.8178
eval_matthews: 0.5427
eval_precision: 0.4911
eval_recall: 0.6999
eval_f1_macro: 0.7654
eval_f1_micro: 0.9165
eval_f1_weighted: 0.9230
eval_runtime: 249.1007
eval_samples_per_second: 449.5090
eval_steps_per_second: 7.0250
epoch: 12.0000


## Experiment: Seq Length 100, Batch 32, CE=1 and 8, F1 =0.5788
Performed standard training with batch size = 32, sequence length = 100, and 10% warmup.  
Used class-weighted cross-entropy loss (CE=1 and 8) with two-class weights.  
Trained for 12 epochs; achieved F1 score of **0.5788**.


In [23]:
training_args = TrainingArguments(
    output_dir="./results/hyenadna1k_g4_finetune_len100_batch64_onlinedatagen",
    label_names=["labels"],
    num_train_epochs=12,
    learning_rate=6e-4,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    optim="adamw_torch",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    warmup_ratio=0.10,
    fp16=True,
    save_total_limit=3,
    logging_strategy="epoch",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    save_safetensors=False,
)

trainer = Trainer(
    model=token_model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics2,
)

trainer.can_return_loss = True
trainer.train()

  trainer = Trainer(
wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: romanbokhyan (romanbokhyan-hse-university). Use `wandb login --relogin` to force relogin


Epoch,Training Loss,Validation Loss,Accuracy,F1,Roc Auc,Matthews,Precision,Recall,F1 Macro,F1 Micro,F1 Weighted
1,0.2518,0.245746,0.904058,0.553074,0.823184,0.521473,0.446486,0.72651,0.749667,0.904058,0.914133
2,0.2379,0.236336,0.905447,0.56398,0.833899,0.535066,0.45249,0.748373,0.755477,0.905447,0.915679
3,0.235,0.233849,0.912659,0.571039,0.821012,0.53754,0.47691,0.711463,0.761209,0.912659,0.920301
4,0.2336,0.235474,0.908744,0.569142,0.830796,0.538722,0.463316,0.737621,0.759055,0.908744,0.917932
5,0.2323,0.233805,0.920653,0.574863,0.800343,0.536913,0.511266,0.656531,0.765553,0.920653,0.92508
6,0.2311,0.231246,0.911705,0.573945,0.827946,0.542278,0.473778,0.727824,0.762347,0.911705,0.91996
7,0.2302,0.23072,0.912415,0.575271,0.827455,0.543373,0.476413,0.725898,0.763222,0.912415,0.920458
8,0.2292,0.230132,0.915315,0.577769,0.821375,0.544054,0.487491,0.709083,0.765353,0.915315,0.922282
9,0.2282,0.230322,0.916973,0.578766,0.817251,0.544001,0.494301,0.698048,0.766357,0.916973,0.923291
10,0.2269,0.230461,0.915354,0.57778,0.821262,0.544035,0.487645,0.708789,0.765371,0.915354,0.922305


TrainOutput(global_step=125976, training_loss=0.2321651761793913, metrics={'train_runtime': 6972.0694, 'train_samples_per_second': 578.166, 'train_steps_per_second': 18.069, 'total_flos': 1060915941955584.0, 'train_loss': 0.2321651761793913, 'epoch': 12.0})

In [24]:
test_metrics = trainer.evaluate(eval_dataset=test_ds)
print("=== Тестовые метрики ===")
for k, v in test_metrics.items():
    print(f"{k}: {v:.4f}")

=== Тестовые метрики ===
eval_loss: 0.2304
eval_accuracy: 0.9172
eval_f1: 0.5788
eval_roc_auc: 0.8177
eval_matthews: 0.5443
eval_precision: 0.4939
eval_recall: 0.6988
eval_f1_macro: 0.7664
eval_f1_micro: 0.9172
eval_f1_weighted: 0.9235
eval_runtime: 261.7581
eval_samples_per_second: 427.7730
eval_steps_per_second: 13.3710
epoch: 12.0000


## Experiment: Seq Length 100, Batch 64, CE=0.7 and 2, F1 = 0.5761  
Performed standard training with batch size = 64, sequence length = 100, and 10% warmup.  
Used class-weighted cross-entropy loss (CE=0.7 and 2) with two-class weights.  
Trained for 12 epochs; achieved F1 score of **0.5761**.


In [63]:
training_args = TrainingArguments(
    output_dir="./results/hyenadna1k_g4_finetune_len100_batch64_onlinedatagen",            
    label_names=["labels"],
    num_train_epochs=12,                                 
    learning_rate=5e-4,                                  
    per_device_train_batch_size=64,                     
    per_device_eval_batch_size=64,                     
    optim="adamw_torch",
    weight_decay=0.01,                                   
    lr_scheduler_type="linear",                          
    warmup_ratio=0.10,                                    
    fp16=True,                                           

    save_total_limit=3,
    logging_strategy="epoch",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    save_safetensors=False

)

trainer = Trainer(
    model = token_model, 
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,   
    tokenizer=tokenizer,
    compute_metrics=compute_metrics2,
)

trainer.can_return_loss = True
trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Roc Auc,Matthews,Precision,Recall,F1 Macro,F1 Micro,F1 Weighted
1,0.2286,0.232987,0.913603,0.574058,0.822005,0.54068,0.480658,0.712513,0.762992,0.913603,0.92105
2,0.2315,0.23415,0.911258,0.57087,0.825223,0.538589,0.471895,0.722381,0.760691,0.911258,0.919491
3,0.2304,0.232874,0.916301,0.576099,0.815974,0.541108,0.491413,0.69605,0.764832,0.916301,0.922723
4,0.2291,0.233184,0.907832,0.570572,0.835644,0.541648,0.460665,0.749354,0.759474,0.907832,0.917505
5,0.2273,0.235524,0.918647,0.575368,0.807446,0.538587,0.50163,0.674522,0.765191,0.918647,0.923992
6,0.225,0.235654,0.916092,0.574257,0.814264,0.538917,0.490482,0.692544,0.763858,0.916092,0.922475
7,0.2224,0.235506,0.913508,0.572743,0.820565,0.539038,0.480202,0.709466,0.762313,0.913508,0.920904
8,0.2192,0.239553,0.91298,0.570117,0.818785,0.536058,0.478012,0.706189,0.760854,0.91298,0.920419
9,0.2155,0.24479,0.913875,0.566908,0.811824,0.531201,0.481164,0.689837,0.759546,0.913875,0.920702
10,0.2114,0.25065,0.912201,0.563657,0.812809,0.528309,0.474532,0.694002,0.757423,0.912201,0.919524


TrainOutput(global_step=62988, training_loss=0.22091344958253456, metrics={'train_runtime': 5485.1892, 'train_samples_per_second': 734.891, 'train_steps_per_second': 11.483, 'total_flos': 1060915941955584.0, 'train_loss': 0.22091344958253456, 'epoch': 12.0})

In [64]:
test_metrics = trainer.evaluate(eval_dataset=test_ds)
print("=== Тестовые метрики ===")
for k, v in test_metrics.items():
    print(f"{k}: {v:.4f}")

=== Тестовые метрики ===
eval_loss: 0.2327
eval_accuracy: 0.9164
eval_f1: 0.5761
eval_roc_auc: 0.8166
eval_matthews: 0.5414
eval_precision: 0.4908
eval_recall: 0.6974
eval_f1_macro: 0.7649
eval_f1_micro: 0.9164
eval_f1_weighted: 0.9229
eval_runtime: 248.5710
eval_samples_per_second: 450.4670
eval_steps_per_second: 7.0400
epoch: 12.0000


In [65]:
save_dir = "./models/hyenadna-small-1k-g4_len100_bs64_lr5e-4_12epochs_linear_adamw_07_2weights_best2"

trainer.save_model(save_dir)      
tokenizer.save_pretrained(save_dir)    


('./models/hyenadna-small-1k-g4_len100_bs64_lr5e-4_12epochs_linear_adamw_07_2weights_best2\\tokenizer_config.json',
 './models/hyenadna-small-1k-g4_len100_bs64_lr5e-4_12epochs_linear_adamw_07_2weights_best2\\special_tokens_map.json',
 './models/hyenadna-small-1k-g4_len100_bs64_lr5e-4_12epochs_linear_adamw_07_2weights_best2\\added_tokens.json')

In [25]:
full_target_modules = [
    # Mixer layers
    "hyena.backbone.layers.0.mixer.in_proj",
    "hyena.backbone.layers.0.mixer.out_proj",
    "hyena.backbone.layers.1.mixer.in_proj",
    "hyena.backbone.layers.1.mixer.out_proj",
    "hyena.backbone.layers.2.mixer.in_proj",
    "hyena.backbone.layers.2.mixer.out_proj",
    "hyena.backbone.layers.3.mixer.in_proj",
    "hyena.backbone.layers.3.mixer.out_proj",
    
    # MLP (FeedforwardNetwork) layers
    "hyena.backbone.layers.0.mlp.fc1",
    "hyena.backbone.layers.0.mlp.fc2",
    "hyena.backbone.layers.1.mlp.fc1",
    "hyena.backbone.layers.1.mlp.fc2",
    "hyena.backbone.layers.2.mlp.fc1",
    "hyena.backbone.layers.2.mlp.fc2",
    "hyena.backbone.layers.3.mlp.fc1",
    "hyena.backbone.layers.3.mlp.fc2",
]
mixer_only = [
    "hyena.backbone.layers.0.mixer.in_proj",
    "hyena.backbone.layers.0.mixer.out_proj",
    "hyena.backbone.layers.1.mixer.in_proj",
    "hyena.backbone.layers.1.mixer.out_proj",
    "hyena.backbone.layers.2.mixer.in_proj",
    "hyena.backbone.layers.2.mixer.out_proj",
    "hyena.backbone.layers.3.mixer.in_proj",
    "hyena.backbone.layers.3.mixer.out_proj",
]
ffn_only = [
    "hyena.backbone.layers.0.mlp.fc1",
    "hyena.backbone.layers.0.mlp.fc2",
    "hyena.backbone.layers.1.mlp.fc1",
    "hyena.backbone.layers.1.mlp.fc2",
    "hyena.backbone.layers.2.mlp.fc1",
    "hyena.backbone.layers.2.mlp.fc2",
    "hyena.backbone.layers.3.mlp.fc1",
    "hyena.backbone.layers.3.mlp.fc2",
]

## LoRa

In [None]:

base_model = token_model


### Experiment 1: full modules, 3 epochs, BS=64, LR=2e-5

In [27]:
peft_config1 = LoraConfig(
    task_type=TaskType.TOKEN_CLS, inference_mode=False,
    r=8, lora_alpha=32, lora_dropout=0.1,
    target_modules=full_target_modules,
)
model1 = get_peft_model(base_model, peft_config1)
model1.print_trainable_parameters()

training_args1 = TrainingArguments(
    output_dir="./results/exp1_full_3ep_6e-4",
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    num_train_epochs=3,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs/exp1_full_3ep_6e-4",
    logging_steps=10,
    learning_rate=5e-4,
    weight_decay=0.01,
    save_safetensors=False

)
trainer1 = Trainer(
    model=model1,
    args=training_args1,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    processing_class=tokenizer,
    compute_metrics=compute_metrics2,
)
trainer1.train()
model1 = model1.unload()  

trainable params: 33,024 || all params: 469,376 || trainable%: 7.0357


No label_names provided for model class `PeftModelForTokenClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: romanbokhyan (romanbokhyan-hse-university). Use `wandb login --relogin` to force relogin


Epoch,Training Loss,Validation Loss,Accuracy,F1,Roc Auc,Matthews,Precision,Recall,F1 Macro,F1 Micro,F1 Weighted
1,0.2545,0.236807,0.907718,0.565709,0.829297,0.535078,0.459587,0.735556,0.757042,0.907718,0.917106
2,0.2196,0.23334,0.912256,0.572262,0.823919,0.539515,0.475562,0.718325,0.761688,0.912256,0.920157
3,0.2352,0.232391,0.913054,0.574202,0.823958,0.541357,0.478632,0.717457,0.762893,0.913054,0.920747


 [15747/15747 22:07, Epoch 3/3]


### Experiment 2: full modules, 10 epochs, BS=64, LR=5e-4

In [28]:
peft_config2 = LoraConfig(
    task_type=TaskType.TOKEN_CLS, inference_mode=False,
    r=8, lora_alpha=32, lora_dropout=0.1,
    target_modules=full_target_modules,
)
model2 = get_peft_model(base_model, peft_config2)
model2.print_trainable_parameters()

training_args2 = TrainingArguments(
    output_dir="./results/exp2_full_10ep_6e-4",
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    num_train_epochs=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs/exp2_full_10ep_6e-4",
    logging_steps=10,
    learning_rate=5e-4,
    weight_decay=0.01,
    optim="adamw_torch",
    lr_scheduler_type="linear",
    warmup_ratio=0.10,
    save_safetensors=False

)
trainer2 = Trainer(
    model=model2,
    args=training_args2,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    processing_class=tokenizer,
    compute_metrics=compute_metrics2,
)
trainer2.train()
model2 = model2.unload()



trainable params: 33,024 || all params: 469,376 || trainable%: 7.0357


No label_names provided for model class `PeftModelForTokenClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Roc Auc,Matthews,Precision,Recall,F1 Macro,F1 Micro,F1 Weighted
1,0.2583,0.239716,0.900766,0.557883,0.839478,0.531672,0.438621,0.766218,0.750997,0.900766,0.912552
2,0.2205,0.235335,0.907475,0.566469,0.831087,0.536367,0.458951,0.739777,0.75734,0.907475,0.917018
3,0.2387,0.234458,0.913472,0.571816,0.819458,0.537858,0.479994,0.707079,0.761845,0.913472,0.920818
4,0.2507,0.233895,0.907192,0.568583,0.834889,0.539582,0.458413,0.748461,0.758293,0.907192,0.917
5,0.1936,0.234363,0.919804,0.57543,0.803782,0.53801,0.507069,0.665095,0.765575,0.919804,0.924646
6,0.2422,0.2328,0.916466,0.575508,0.814674,0.540231,0.492081,0.692998,0.764591,0.916466,0.922774
7,0.2363,0.232368,0.913214,0.573854,0.822979,0.540754,0.479194,0.715117,0.762771,0.913214,0.920814
8,0.2156,0.231802,0.913676,0.575912,0.824238,0.543054,0.481071,0.71733,0.76393,0.913676,0.921221
9,0.2512,0.231412,0.915003,0.576942,0.821297,0.543241,0.486219,0.709285,0.764849,0.915003,0.922047
10,0.2309,0.231346,0.914013,0.576672,0.824161,0.543753,0.482393,0.716757,0.764409,0.914013,0.921466


 [52490/52490 1:13:39, Epoch 10/10]


### Experiment 3: mixer only, 10 epochs, BS=64, LR=5e-4

In [29]:
peft_config3 = LoraConfig(
    task_type=TaskType.TOKEN_CLS, inference_mode=False,
    r=8, lora_alpha=32, lora_dropout=0.1,
    target_modules=mixer_only,
)
model3 = get_peft_model(base_model, peft_config3)
model3.print_trainable_parameters()

training_args3 = TrainingArguments(
    output_dir="./results/exp3_mixer_only_5ep_6e-4",
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    num_train_epochs=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs/exp3_mixer_only_5ep_6e-4",
    logging_steps=10,
    learning_rate=5e-4,
    weight_decay=0.01,
    save_safetensors=False

)
trainer3 = Trainer(
    model=model3,
    args=training_args3,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    processing_class=tokenizer,
    compute_metrics=compute_metrics2,
)
trainer3.train()
model3 = model3.unload()



trainable params: 12,544 || all params: 448,896 || trainable%: 2.7944


No label_names provided for model class `PeftModelForTokenClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Roc Auc,Matthews,Precision,Recall,F1 Macro,F1 Micro,F1 Weighted
1,0.254,0.237459,0.9057,0.563645,0.832662,0.534331,0.453167,0.745356,0.755392,0.9057,0.915802
2,0.2217,0.235106,0.90912,0.568025,0.828097,0.536824,0.464372,0.731246,0.758622,0.90912,0.91807
3,0.2376,0.234449,0.915005,0.572213,0.8151,0.537131,0.485967,0.695678,0.762514,0.915005,0.921716
4,0.2506,0.23447,0.9082,0.568622,0.831787,0.53857,0.461524,0.740446,0.758628,0.9082,0.917583
5,0.1936,0.235796,0.921273,0.573034,0.796131,0.534487,0.514534,0.646542,0.764836,0.921273,0.925294
6,0.2433,0.233094,0.916692,0.575392,0.813795,0.539919,0.493027,0.690798,0.764604,0.916692,0.922894
7,0.2381,0.232779,0.913406,0.574049,0.822625,0.540841,0.47992,0.714111,0.762926,0.913406,0.920937
8,0.2173,0.23229,0.913449,0.574895,0.823614,0.541917,0.480147,0.71623,0.763357,0.913449,0.92102
9,0.2517,0.232074,0.915041,0.57615,0.820127,0.542179,0.486326,0.706671,0.764469,0.915041,0.922013
10,0.234,0.231933,0.913689,0.575553,0.823717,0.542569,0.481092,0.71617,0.763756,0.913689,0.921203


 [52490/52490 1:12:04, Epoch 10/10]
