In [21]:
!nvidia-smi

Sun Nov  2 22:41:38 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 581.57                 Driver Version: 581.57         CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3070 Ti   WDDM  |   00000000:01:00.0  On |                  N/A |
|  0%   57C    P2             61W /  290W |    7684MiB /   8192MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

In [23]:
import torch
import torch._dynamo
torch.set_float32_matmul_precision('high')
torch._dynamo.config.suppress_errors = True
torch._dynamo.disable()
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report, accuracy_score
import warnings
warnings.filterwarnings("ignore")

In [5]:
dataset_name = "Aniemore/cedr-m7"
dataset = load_dataset(dataset_name, split="test[:500]")
dataset

Dataset({
    features: ['text', 'labels', 'source', 'label2ids'],
    num_rows: 500
})

In [6]:
df = pd.DataFrame(dataset)

if isinstance(df['labels'].iloc[0], list):
    label_names = df['labels'].explode().unique()
else:
    label_names = df['labels'].unique()

print(label_names)

['neutral' 'fear' 'enthusiasm' 'sadness' 'happiness' 'anger' 'disgust']


In [7]:
labels_str = ", ".join(label_names)

In [8]:
def sample2messages(sample):
    text = sample['text']
    prompt = f"""Определи эмоцию в следующем тексте.
Текст: "{text}"
Эмоция (выбери только одно из: {labels_str}):"""
    return [{'role': 'user', 'content': prompt}]

In [None]:
warnings.filterwarnings("ignore")
model_names = ["Lamapi/next-1b", "Unbabel/Tower-Plus-2B"]
results = {}

for model_name in model_names:
    
    model = AutoModelForCausalLM.from_pretrained(
        model_name,  
        device_map='cuda', 
        torch_dtype=torch.bfloat16 
    )
    model.eval()

    tokenizer = AutoTokenizer.from_pretrained(model_name)

    gen_config = GenerationConfig.from_pretrained(model_name)
    gen_config.max_new_tokens = 15
    gen_config.do_sample = False
    gen_config.pad_token_id = tokenizer.eos_token_id

    def generate(sample):
        inputs = tokenizer.apply_chat_template(
            sample2messages(sample), 
            add_generation_prompt=True, 
            return_tensors='pt'
        )
        inputs = inputs.to(model.device)
        
        with torch.no_grad():
            res = model.generate(inputs, generation_config=gen_config).detach().cpu().numpy()[0]
        
        output_tokens = res[inputs.shape[1]:]
        return tokenizer.decode(output_tokens, skip_special_tokens=True)
    
    predictions = []
    ground_truth = []

    for sample in tqdm(dataset):
        predict_raw = generate(sample)
        
        predict_clean = predict_raw.strip().lower().strip(".,'\"")
        
        found_label = None
        for label in label_names:
            if label in predict_clean.split():
                found_label = label
                break
        
        if not found_label:
             for label in label_names:
                if label in predict_clean:
                    found_label = label
                    break
        
        if found_label is None:
            found_label = "unknown"
            
        predictions.append(found_label)
        
        true_label_str = sample['labels'][0].lower() 
        
        ground_truth.append(true_label_str)

    
    print(f"Результаты для {model_name}")
    
    report = classification_report(
        ground_truth, 
        predictions, 
        labels=label_names, 
        zero_division=0,
        digits=3
    )
    print(report)
    
    acc = accuracy_score(ground_truth, predictions)
    print(f"Overall Accuracy: {acc:.4f}")
    
    results[model_name] = {
        "report": report,
        "accuracy": acc
    }

100%|████████████████████████████████████████████████████████████████████████████████| 500/500 [08:59<00:00,  1.08s/it]


Результаты для Lamapi/next-1b
              precision    recall  f1-score   support

     neutral      0.667     0.009     0.017       226
        fear      0.812     0.703     0.754        37
  enthusiasm      0.087     0.344     0.139        32
     sadness      0.750     0.400     0.522       105
   happiness      0.375     0.481     0.422        81
       anger      0.128     0.882     0.224        17
     disgust      0.000     0.000     0.000         2

   micro avg      0.291     0.270     0.280       500
   macro avg      0.403     0.403     0.297       500
weighted avg      0.590     0.270     0.258       500

Overall Accuracy: 0.2700


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
import torch
import torch._dynamo
torch.set_float32_matmul_precision('high')
torch._dynamo.config.suppress_errors = True
torch._dynamo.disable()
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report, accuracy_score
import warnings
warnings.filterwarnings("ignore")

model_name = "Unbabel/Tower-Plus-2B"
results = {}

model = AutoModelForCausalLM.from_pretrained(
    model_name,  
    device_map='cuda', 
    torch_dtype=torch.bfloat16 
)
model.eval()

tokenizer = AutoTokenizer.from_pretrained(model_name)

gen_config = GenerationConfig.from_pretrained(model_name)
gen_config.max_new_tokens = 15
gen_config.do_sample = False
gen_config.pad_token_id = tokenizer.eos_token_id

def generate(sample):
    inputs = tokenizer.apply_chat_template(
        sample2messages(sample), 
        add_generation_prompt=True, 
        return_tensors='pt'
    )
    inputs = inputs.to(model.device)
    
    with torch.no_grad():
        res = model.generate(inputs, generation_config=gen_config).detach().cpu().numpy()[0]
    
    output_tokens = res[inputs.shape[1]:]
    return tokenizer.decode(output_tokens, skip_special_tokens=True)

predictions = []
ground_truth = []

for sample in tqdm(dataset):
    predict_raw = generate(sample)
    
    predict_clean = predict_raw.strip().lower().strip(".,'\"")
    
    found_label = None
    for label in label_names:
        if label in predict_clean.split():
            found_label = label
            break
    
    if not found_label:
         for label in label_names:
            if label in predict_clean:
                found_label = label
                break
    
    if found_label is None:
        found_label = "unknown"
        
    predictions.append(found_label)
    
    true_label_str = sample['labels'][0].lower() 
    
    ground_truth.append(true_label_str)

print(f"Результаты для {model_name}")

report = classification_report(
    ground_truth, 
    predictions, 
    labels=label_names, 
    zero_division=0,
    digits=3
)
print(report)

acc = accuracy_score(ground_truth, predictions)
print(f"Overall Accuracy: {acc:.4f}")

results[model_name] = {
    "report": report,
    "accuracy": acc
}

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  0%|                                                                                          | 0/500 [00:00<?, ?it/s]The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Using `cache_implementation='hybrid' is deprecated. Please only use one of ('static', 'offloaded_static'), and the layer structure will be inferred automatically.
W1102 23:03:18.794000 14784 site-packages\torch\_dynamo\convert_frame.py:1125] WON'T CONVERT wrapper C:\Users\artur\AppData\Local\Programs\Python\Python310\lib\site-packages\transformers\utils\generic.py line 912 
W1102 23:03:18.794000 14784 site-packages\torch\_dynamo\convert_frame.py:1125] due to: 
W1102 23:03:18.794000 14784 site-packages\torch\_dynamo\convert_frame.py:1125] Traceback (most recent call last):
W1102 23:03:18.794000 14784 site-packages\torch\_dynamo\convert_frame.py:

Результаты для Unbabel/Tower-Plus-2B
              precision    recall  f1-score   support

     neutral      0.752     0.482     0.588       226
        fear      0.337     0.919     0.493        37
  enthusiasm      0.098     0.156     0.120        32
     sadness      0.892     0.629     0.737       105
   happiness      0.833     0.556     0.667        81
       anger      0.192     0.824     0.311        17
     disgust      0.000     0.000     0.000         2

   micro avg      0.548     0.546     0.547       500
   macro avg      0.443     0.509     0.417       500
weighted avg      0.700     0.546     0.583       500

Overall Accuracy: 0.5460



