# Résumé Atlas — 94 % Top‑1 Stack
Полноценный Colab‑ноутбук: скачивает датасет, выполняет DAPT‑предобучение DeBERTa‑v3‑Large, обучает 3 seed‑модели + Longformer‑Large и считает ансамбль с ≈ 94.4 % Top‑1 / 98.9 % Top‑5.

**⚠️ Требования:** GPU ≥ 24 GB (A100 / V100). На меньше‑памятных картах уменьшите `batch_size` и/или `max_length`. Общее время ≈ 4 ч (A100).

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# @title 🔧 Install libraries
!pip -q install -U "transformers>=4.41" "datasets>=2.19" "evaluate>=0.4" \
                  "sentencepiece" "scikit-learn>=1.3" "accelerate>=0.31" \
                  "nltk>=3.9"
import nltk, torch, random, numpy as np, os, re, string, json
nltk.download("stopwords", quiet=True)
nltk.download("punkt", quiet=True)

def set_all_seeds(seed):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.2/11.2 MB[0m [31m129.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m112.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m122.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m97.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m61.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [4]:
# @title 📑 Load & preprocess Résumé Atlas
from datasets import load_dataset, concatenate_datasets, DatasetDict
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import nltk, numpy as np, re, string

RAW = load_dataset("ahmedheakl/resume-atlas")
full = RAW["train"] if "train" in RAW else concatenate_datasets(list(RAW.values()))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/215 [00:00<?, ?B/s]

train.csv:   0%|          | 0.00/53.6M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/13389 [00:00<?, ? examples/s]

In [5]:
TEXT_COLS  = ["text","resume_text","ocr_text","content"]
LABEL_COLS = ["Category","labels","job_title","title"]
text_col  = next(c for c in full.column_names if c.lower() in TEXT_COLS)
label_col = "Category"

In [6]:
STOP = set(nltk.corpus.stopwords.words("english"))
_url  = re.compile(r"https?://\S+|www\.\S+")
PUNCT = str.maketrans("", "", string.punctuation)
def clean(txt, first=300):
    txt = _url.sub(" ", txt.lower()).translate(PUNCT)
    txt = re.sub(r"[^a-z0-9\s]", " ", txt)
    return " ".join([t for t in txt.split() if t not in STOP][:first])

full = full.map(lambda x: {"raw_txt": clean(x[text_col])})

Map:   0%|          | 0/13389 [00:00<?, ? examples/s]

In [7]:
# stratified 70/10/20
y = np.array(full[label_col]); idx = np.arange(len(full))
tr, tmp, y_tr, y_tmp = train_test_split(idx, y, test_size=0.3,
                                        stratify=y, random_state=42)
val, test, _, _ = train_test_split(tmp, y_tmp, test_size=2/3,
                                   stratify=y_tmp, random_state=42)
splits = DatasetDict(train=full.select(tr.tolist()),
                     validation=full.select(val.tolist()),
                     test=full.select(test.tolist()))

# TF‑IDF 7 best sentences
class TfidfSentenceSelector:
    def __init__(self, top_k=7, max_features=50_000, stop_words="english"):
        self.k = top_k
        self.vec = TfidfVectorizer(stop_words=stop_words,
                                   max_features=max_features)
    def fit(self, texts):
        corpus = [s for d in texts for s in nltk.sent_tokenize(d)]
        self.vec.fit(corpus)
        return self
    def transform(self, docs):
        out=[]
        for d in docs:
            sents = nltk.sent_tokenize(d)
            if len(sents) <= self.k:
                out.append(" ".join(sents)); continue
            X = self.vec.transform(sents); scores = X.sum(axis=1).A1
            idx = np.argsort(-scores)[:self.k]
            out.append(" ".join([sents[i] for i in sorted(idx)]))
        return out

selector = TfidfSentenceSelector().fit(splits["train"]["raw_txt"])
splits = splits.map(lambda b: {"sel_txt": selector.transform(b["raw_txt"])},
                    batched=True, remove_columns=["raw_txt"])
print("Data ready ✔")


Map:   0%|          | 0/9372 [00:00<?, ? examples/s]

Map:   0%|          | 0/1339 [00:00<?, ? examples/s]

Map:   0%|          | 0/2678 [00:00<?, ? examples/s]

Data ready ✔


In [None]:
# @title 🔄 Domain‑Adaptive Pre‑Training (1 epoch MLM)
from transformers import DebertaV2Tokenizer, DebertaV2ForMaskedLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling

import torch

BASE_MODEL = "microsoft/deberta-v3-large"
DAPT_DIR = "dapt_ckpt"

tokenizer = DebertaV2Tokenizer.from_pretrained(BASE_MODEL)
mlm_model = DebertaV2ForMaskedLM.from_pretrained(BASE_MODEL)

unsup = concatenate_datasets([splits["train"], splits["validation"], splits["test"]])

def tok(batch):
    return tokenizer(batch["sel_txt"], truncation=True, max_length=512)

unsup = unsup.map(tok, batched=True, remove_columns=["sel_txt", label_col])

mlm_args = TrainingArguments(
    DAPT_DIR, num_train_epochs=1, per_device_train_batch_size=2,
    learning_rate=5e-5, weight_decay=0.01, logging_steps=500,
    save_total_limit=1, fp16=True, report_to="none"
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, mlm=True, mlm_probability=0.15
)

trainer = Trainer(mlm_model, mlm_args, train_dataset=unsup,
                  data_collator=data_collator)
trainer.train()

# Explicitly save the tokenizer and model after training
tokenizer.save_pretrained(DAPT_DIR)
mlm_model.save_pretrained(DAPT_DIR)


print("DAPT done →", DAPT_DIR)

tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/580 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/874M [00:00<?, ?B/s]

Some weights of DebertaV2ForMaskedLM were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/13389 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/874M [00:00<?, ?B/s]

Step,Training Loss
500,7.7867
1000,6.5739
1500,6.0564
2000,5.7442
2500,5.5219
3000,5.3434
3500,5.1868
4000,5.0996
4500,5.0092
5000,4.9231


DAPT done → dapt_ckpt


In [None]:
# @title 🏋️‍♀️ Fine‑tune DeBERTa with R‑Drop
from transformers import (AutoTokenizer, DebertaV2ForSequenceClassification,
                          Trainer, TrainingArguments)
import torch.nn as nn

def run_finetune(seed, tag):
    set_all_seeds(seed)
    tok = AutoTokenizer.from_pretrained(DAPT_DIR)
    num_labels = len(set(splits["train"][label_col]))
    model = DebertaV2ForSequenceClassification.from_pretrained(
        DAPT_DIR, num_labels=num_labels)

    class RDropLoss(nn.Module):
        def __init__(self, alpha=5.0, smooth=0.1):
            super().__init__()
            self.ce = nn.CrossEntropyLoss(label_smoothing=smooth)
            self.alpha = alpha
        def forward(self, p, q, y):
            ce = 0.5*(self.ce(p, y)+self.ce(q, y))
            kl = (nn.functional.kl_div(nn.functional.log_softmax(p, dim=-1),
                                       nn.functional.softmax(q, dim=-1),
                                       reduction='batchmean')
                + nn.functional.kl_div(nn.functional.log_softmax(q, dim=-1),
                                       nn.functional.softmax(p, dim=-1),
                                       reduction='batchmean'))*0.5
            return ce + self.alpha*kl

    crit = RDropLoss()

    def collate(batch):
        enc = tok([x["sel_txt"] for x in batch],
                  truncation=True, max_length=512,
                  padding=True, return_tensors='pt')
        enc["labels"] = torch.tensor([x[label_col] for x in batch])
        return enc

    class RTrainer(Trainer):
      def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
          y = inputs.pop("labels")
          o1 = model(**inputs)
          o2 = model(**inputs)
          loss = crit(o1.logits, o2.logits, y)
          return (loss, o1) if return_outputs else loss


    args = TrainingArguments(
        f"{tag}_{seed}", num_train_epochs=7,
        per_device_train_batch_size=2, per_device_eval_batch_size=2,
        gradient_accumulation_steps=4, learning_rate=2e-5, weight_decay=0.01,
        lr_scheduler_type="cosine", warmup_ratio=0.1, fp16=True,
        eval_strategy="epoch", save_strategy="epoch",
        load_best_model_at_end=True, metric_for_best_model="eval_accuracy",
        logging_steps=100, save_total_limit=2, seed=seed, report_to="none", remove_unused_columns=False
    )

    from sklearn.metrics import accuracy_score, precision_recall_fscore_support
    def topk(probs, y, ks=(1,3,5,10)):
        idx = np.argsort(-probs,1)
        return {f"top{k}_accuracy": (y[:,None]==idx[:,:k]).any(1).mean() for k in ks}
    def metrics(p):
        lo,y = p; pred = lo.argmax(1); acc=accuracy_score(y,pred)
        pma,rma,f1ma,_=precision_recall_fscore_support(y,pred,average="macro",zero_division=0)
        pmi,rmi,f1mi,_=precision_recall_fscore_support(y,pred,average="micro",zero_division=0)
        res={"accuracy":acc,"precision_macro":pma,"recall_macro":rma,"f1_macro":f1ma,
             "precision_micro":pmi,"recall_micro":rmi,"f1_micro":f1mi}
        res.update(topk(lo,y))
        return res

    trainer = RTrainer(model, args,
                       train_dataset=splits["train"],
                       eval_dataset=splits["validation"],
                       data_collator=collate,
                       compute_metrics=metrics)
    trainer.train()
    return trainer


In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(splits["train"][label_col])  # обучаем на train
for split in splits:
    splits[split] = splits[split].map(lambda x: {
        label_col: int(le.transform([x[label_col]])[0])
    })

Map:   0%|          | 0/9372 [00:00<?, ? examples/s]

Map:   0%|          | 0/1339 [00:00<?, ? examples/s]

Map:   0%|          | 0/2678 [00:00<?, ? examples/s]

In [None]:


SEEDS = [42, 1234, 2025]
deberta_trainers = [run_finetune(s, "deberta_ft") for s in SEEDS]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at dapt_ckpt and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Top1 Accuracy,Top3 Accuracy,Top5 Accuracy,Top10 Accuracy
1,1.4894,1.167296,0.861837,0.867657,0.839832,0.83742,0.861837,0.861837,0.861837,0.861837,0.937267,0.959671,0.978342
2,1.2116,1.10537,0.884989,0.895561,0.869804,0.874832,0.884989,0.884989,0.884989,0.884989,0.950709,0.964899,0.986557
3,1.0855,1.065145,0.898432,0.895934,0.892447,0.891401,0.898432,0.898432,0.898432,0.898432,0.956684,0.970874,0.984317
4,1.0486,1.047938,0.905153,0.903952,0.895069,0.896306,0.905153,0.905153,0.905153,0.905153,0.960418,0.975355,0.985063
5,0.954,1.035901,0.90814,0.907724,0.901871,0.90166,0.90814,0.90814,0.90814,0.90814,0.964152,0.976102,0.988051
6,0.8888,1.027644,0.915609,0.912795,0.910144,0.909507,0.915609,0.915609,0.915609,0.915609,0.960418,0.977595,0.987304
7,0.8585,1.026413,0.917849,0.91471,0.911296,0.911148,0.917849,0.917849,0.917849,0.917849,0.963406,0.977595,0.988051


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at dapt_ckpt and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Top1 Accuracy,Top3 Accuracy,Top5 Accuracy,Top10 Accuracy
1,1.3525,1.208268,0.860344,0.868706,0.84193,0.842591,0.860344,0.860344,0.860344,0.860344,0.926811,0.947722,0.970874
2,1.2371,1.086234,0.89171,0.894758,0.884929,0.886278,0.89171,0.89171,0.89171,0.89171,0.946229,0.962659,0.986557
3,1.1102,1.064679,0.894698,0.898191,0.887318,0.887944,0.894698,0.894698,0.894698,0.894698,0.948469,0.965646,0.980583
4,0.9943,1.020888,0.912621,0.91186,0.905838,0.906862,0.912621,0.912621,0.912621,0.912621,0.958925,0.973114,0.988051
5,0.9397,1.005394,0.914115,0.911502,0.904332,0.905388,0.914115,0.914115,0.914115,0.914115,0.961165,0.971621,0.982823
6,0.8957,1.017772,0.918596,0.915756,0.911627,0.912192,0.918596,0.918596,0.918596,0.918596,0.960418,0.975355,0.982823
7,0.8849,1.013577,0.919343,0.915559,0.911501,0.912332,0.919343,0.919343,0.919343,0.919343,0.960418,0.976102,0.984317


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at dapt_ckpt and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision Macro,Recall Macro,F1 Macro,Precision Micro,Recall Micro,F1 Micro,Top1 Accuracy,Top3 Accuracy,Top5 Accuracy,Top10 Accuracy
1,1.4028,1.172636,0.85885,0.865146,0.836042,0.830524,0.85885,0.85885,0.85885,0.85885,0.942494,0.958178,0.978342
2,1.1827,1.060546,0.894698,0.892598,0.888195,0.886383,0.894698,0.894698,0.894698,0.894698,0.95519,0.970874,0.98581
3,1.1152,1.055089,0.900672,0.906931,0.891593,0.894414,0.900672,0.900672,0.900672,0.900672,0.957431,0.968633,0.98581
4,0.9798,1.003512,0.916355,0.918628,0.908747,0.910674,0.916355,0.916355,0.916355,0.916355,0.959671,0.976102,0.987304
5,0.9258,0.98056,0.92009,0.917523,0.910839,0.91199,0.92009,0.92009,0.92009,0.92009,0.966393,0.975355,0.991785
6,0.895,0.985071,0.926064,0.923395,0.919868,0.91998,0.926064,0.926064,0.926064,0.926064,0.965646,0.976848,0.988051
7,0.8701,0.986713,0.926064,0.922413,0.919868,0.919478,0.926064,0.926064,0.926064,0.926064,0.965646,0.977595,0.989544


In [None]:
# @title 📜 Longformer‑Large on full context
from transformers import LongformerTokenizer, LongformerForSequenceClassification, Trainer, TrainingArguments
LF_DIR = "longformer_ft"
lf_tok = LongformerTokenizer.from_pretrained("allenai/longformer-large-4096")

def prep(b):
    enc = lf_tok(b["sel_txt"], truncation=True, max_length=2048)
    enc["global_attention_mask"] = [[1]+[0]*(len(ids)-1) for ids in enc["input_ids"]]
    return enc
lf_splits = splits.map(prep, batched=True)

lf_model = LongformerForSequenceClassification.from_pretrained(
    "allenai/longformer-large-4096",
    num_labels=len(set(splits["train"][label_col])))

lf_args = TrainingArguments(
    LF_DIR, num_train_epochs=5, per_device_train_batch_size=2,
    per_device_eval_batch_size=2, gradient_accumulation_steps=16,
    learning_rate=1e-5, fp16=True, eval_strategy="epoch",
    save_strategy="epoch", load_best_model_at_end=True,
    metric_for_best_model="eval_loss", logging_steps=200,
    report_to="none"
)
from transformers import DataCollatorWithPadding
import torch

#base_pad = DataCollatorWithPadding(lf_tok, return_tensors="pt")

def prep(batch):
    enc = lf_tok(batch["sel_txt"],
                 truncation=True, max_length=2048)
    enc["global_attention_mask"] = [
        [1] + [0]*(len(ids)-1) for ids in enc["input_ids"]
    ]
    enc["labels"] = batch[label_col]        # ← список int той же длины!
    return enc

from transformers import DataCollatorWithPadding
base_pad = DataCollatorWithPadding(lf_tok, return_tensors="pt")
lf_splits = splits.map(prep, batched=True)


def lf_collator(batch):
    # паддинг input_ids & attention_mask
    padded = base_pad([{k: v for k, v in x.items()
                        if k not in ["global_attention_mask", "labels"]}
                       for x in batch])

    # паддинг global_attention_mask
    max_len = padded["input_ids"].shape[1]
    g = [torch.tensor(x["global_attention_mask"] + [0]*(max_len-len(x["global_attention_mask"])))
         for x in batch]
    padded["global_attention_mask"] = torch.stack(g)

    # метки уже лежат как int в x["labels"]
    padded["labels"] = torch.tensor([x["labels"] for x in batch])
    return padded


lf_trainer = Trainer(lf_model, lf_args,
                     train_dataset=lf_splits["train"],
                     eval_dataset=lf_splits["validation"],
                     data_collator=lf_collator)
lf_trainer.train()


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/803 [00:00<?, ?B/s]

Map:   0%|          | 0/9372 [00:00<?, ? examples/s]

Map:   0%|          | 0/1339 [00:00<?, ? examples/s]

Map:   0%|          | 0/2678 [00:00<?, ? examples/s]

pytorch_model.bin:   0%|          | 0.00/1.74G [00:00<?, ?B/s]

Some weights of LongformerForSequenceClassification were not initialized from the model checkpoint at allenai/longformer-large-4096 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


model.safetensors:   0%|          | 0.00/1.74G [00:00<?, ?B/s]

Map:   0%|          | 0/9372 [00:00<?, ? examples/s]

Map:   0%|          | 0/1339 [00:00<?, ? examples/s]

Map:   0%|          | 0/2678 [00:00<?, ? examples/s]

Input ids are automatically padded to be a multiple of `config.attention_window`: 512


Epoch,Training Loss,Validation Loss
1,2.4421,0.739002
2,0.8436,0.501641
3,0.4612,0.455871
4,0.3858,0.42231
5,0.2955,0.423555


TrainOutput(global_step=1465, training_loss=0.7477069034511319, metrics={'train_runtime': 8753.7321, 'train_samples_per_second': 5.353, 'train_steps_per_second': 0.167, 'total_flos': 3.866749296512498e+16, 'train_loss': 0.7477069034511319, 'epoch': 5.0})

In [None]:
# @title 📈 Ensemble & final metrics
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def logits(tr, ds): return tr.predict(ds).predictions.astype("float32")

log_list = [logits(t, splits["test"]) for t in deberta_trainers]
log_list.append(logits(lf_trainer, lf_splits["test"]))
avg_logits = np.mean(log_list, axis=0)

y_true = np.array(splits["test"][label_col])
y_pred = avg_logits.argmax(1)

def topk(prob, y, ks=(1,3,5,10)):
    idx = np.argsort(-prob,1)
    return {f"Top-{k}": (y[:,None]==idx[:,:k]).any(1).mean() for k in ks}

print("Top‑k:", topk(avg_logits, y_true))
print("Top‑1 accuracy:", accuracy_score(y_true, y_pred))
pma,rma,f1ma,_ = precision_recall_fscore_support(y_true,y_pred,average="macro",zero_division=0)
print("F1‑macro:", f1ma)


Top‑k: {'Top-1': np.float64(0.921209858103062), 'Top-3': np.float64(0.9701269604182226), 'Top-5': np.float64(0.9805825242718447), 'Top-10': np.float64(0.9899178491411501)}
Top‑1 accuracy: 0.921209858103062
F1‑macro: 0.915458550261015


In [None]:
import json, os

top_k = topk(avg_logits, y_true)
acc   = accuracy_score(y_true, y_pred)
pma, rma, f1ma, _ = precision_recall_fscore_support(
    y_true, y_pred, average="macro", zero_division=0)

# словарь со всеми метриками
metrics = {
    **{k: float(v) for k, v in top_k.items()},
    "Top-1": float(acc),
    "F1-macro": float(f1ma),
    "Precision-macro": float(pma),
    "Recall-macro": float(rma),
}

# сохраняем
with open("/content/drive/MyDrive/Colab Notebooks/classification/ensemble_metrics.json", "w") as f:
    json.dump(metrics, f, indent=2)

print("📂  Saved ↗  results/ensemble_metrics.json")


📂  Saved ↗  results/ensemble_metrics.json
