In [1]:
!nvidia-smi

Sun Nov 10 20:25:07 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.4     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-80GB          Off | 00000000:CA:00.0 Off |                    0 |
| N/A   26C    P0              92W / 400W |  28598MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
!pip install accelerate
!pip install transformers==4.45.2
!pip install bitsandbytes
!pip install datasets
!pip install rouge-score
!pip install pymorphy2
!pip install peft
#!pip install flash_attn

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0mLooking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0mLooking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0mLooking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0mLooking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0mLooking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0mLooking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0m

In [3]:
!git clone https://github.com/RefalMachine/llmtf_open
%cd llmtf_open
!wget https://raw.githubusercontent.com/dialogue-evaluation/RuOpinionNE-2024/master/train.jsonl

fatal: destination path 'llmtf_open' already exists and is not an empty directory.
/workdir/diploma-llm/hw3/llmtf_open
--2024-11-10 20:25:27--  https://raw.githubusercontent.com/dialogue-evaluation/RuOpinionNE-2024/master/train.jsonl
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1291979 (1.2M) [text/plain]
Saving to: ‘train.jsonl.19’


2024-11-10 20:25:28 (10.6 MB/s) - ‘train.jsonl.19’ saved [1291979/1291979]



In [4]:
import random
from typing import List, Dict

import torch
from torch.utils.data import Dataset
from transformers import AutoTokenizer
from tqdm import tqdm

class ChatDataset(Dataset):
    def __init__(
        self,
        original_records: List[Dict],
        tokenizer: AutoTokenizer,
        max_tokens_count: int,
        sample_rate: float = 1.0,
        only_target_loss: bool = True,
        add_global_bos: bool = True,
        add_global_eos: bool = True,
        labels_pad_token_id: int = -100,
    ):
        self.original_records = original_records
        self.sample_rate = sample_rate
        self.tokenizer = tokenizer
        self.max_tokens_count = max_tokens_count
        self.only_target_loss = only_target_loss
        self.labels_pad_token_id = labels_pad_token_id
        self.add_global_bos = add_global_bos
        self.add_global_eos = add_global_eos
        self.is_printed = False

        self.records = []
        for record in tqdm(original_records):
            if random.random() > self.sample_rate:
                continue
            tensors = self.convert_record(record)
            if tensors is None:
                continue
            self.records.append(tensors)

    def __len__(self):
        return len(self.records)

    def __getitem__(self, index):
        return self.records[index]

    def fill_template(self, message: str, inputs: Dict[str, str]) -> str:
        """Заполняет шаблон значениями из inputs."""
        return message.format(**inputs)

    def get_tokens(self, messages):
        tokens = self.tokenizer.apply_chat_template(
            messages,
            add_special_tokens=False,
            tokenize=True,
            add_generation_prompt=False,
        )
        if tokens[0] == self.tokenizer.bos_token_id:
            tokens = tokens[1:]
        return tokens

    def convert_record(self, record):
        input_ids, labels = [], []

        filled_messages = [
            {"content": self.fill_template(msg["content"], record["inputs"]), "role": msg["role"]}
            for msg in record["messages"]
        ]

        for i, message in enumerate(filled_messages):
            if message['role'] == 'bot':
                message['role'] = 'assistant'
                filled_messages[i]['role'] = 'assistant'

            message_input_ids = self.get_tokens([message])
            message_labels = message_input_ids

            if len(input_ids) + len(message_input_ids) > self.max_tokens_count - 2:
                break

            labels_mask = [self.labels_pad_token_id for _ in range(len(message_input_ids))]
            if message["role"] not in ("assistant", "bot", "gpt") and self.only_target_loss:
                message_labels = labels_mask

            input_ids.extend(message_input_ids)
            labels.extend(message_labels)

        if not input_ids:
            return None

        original_input_ids = self.get_tokens(filled_messages)
        if input_ids != original_input_ids[: len(input_ids)]:
            print("Mismatch found:")
            print("Generated input_ids:", input_ids)
            print("Original input_ids:", original_input_ids[: len(input_ids)])
        
        assert input_ids == original_input_ids[: len(input_ids)]

        if self.add_global_bos and input_ids[0] != self.tokenizer.bos_token_id:
            input_ids.insert(0, self.tokenizer.bos_token_id)
            labels.insert(0, self.labels_pad_token_id)

        if input_ids[-2] == self.tokenizer.eos_token_id:
            input_ids = input_ids[:-1]
            labels = labels[:-1]

        if self.add_global_eos and input_ids[-1] != self.tokenizer.eos_token_id:
            input_ids.append(self.tokenizer.eos_token_id)
            labels.append(self.tokenizer.eos_token_id)

        if not self.is_printed:
            print(input_ids)
            print(labels)
            print(
                "Full prompt:" +
                self.tokenizer.decode(input_ids, skip_special_tokens=False)
            )
            assert '\n' in self.tokenizer.decode(input_ids, skip_special_tokens=False)
            self.is_printed = True

        input_ids = torch.LongTensor(input_ids)
        labels = torch.LongTensor(labels)
        attention_mask = input_ids.new_ones(input_ids.size())
        assert (
            input_ids.size(0)
            == labels.size(0)
            == attention_mask.size(0)
            <= self.max_tokens_count
        )
        return {
            "input_ids": input_ids,
            "labels": labels,
            "attention_mask": attention_mask,
        }

In [5]:
from datasets import load_dataset
dataset = load_dataset('kngrg/ru-QAmeleon')
dataset = dataset['test']
dataset = dataset.train_test_split(test_size=0.2)
dataset

DatasetDict({
    train: Dataset({
        features: ['messages', 'inputs', 'outputs'],
        num_rows: 3689
    })
    test: Dataset({
        features: ['messages', 'inputs', 'outputs'],
        num_rows: 923
    })
})

In [6]:
import random
import json
import os

import torch
import numpy as np
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    DataCollatorForTokenClassification,
    AutoConfig,
)
from transformers import (
    Trainer,
    TrainingArguments,
    logging,
    TrainerCallback,
    TrainerState,
    TrainerControl,
    BitsAndBytesConfig,
)
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
from peft import get_peft_model, LoraConfig
import re
from peft import prepare_model_for_kbit_training
import codecs

os.environ["WANDB_DISABLED"] = "true"

In [7]:
import codecs
import json
import copy
from collections import OrderedDict, defaultdict
import numpy as np
from tqdm import tqdm
import os
from datasets import load_dataset, Dataset
from typing import Dict, List, Tuple
from llmtf.metrics import mean, metric_max_over_ground_truths, f1_macro_score
import transformers.data.metrics.squad_metrics as squad_metrics
import re
from llmtf.base import Task, SimpleFewShotHFTask, LLM
from difflib import SequenceMatcher
import pandas as pd
import string

class QATask(SimpleFewShotHFTask):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.method = 'generate'
        self.dataset_name = 'QAmeleon'
        self._max_new_tokens = 64

    @classmethod
    def name(cls):
        return 'kngrg/ru-QAmeleon'

    def dataset_args(self) -> Dict:
        return {'path': 'kngrg/ru-QAmeleon'}

    def aggregation(self) -> Dict:
        return {
          "f1": mean,
          "em": mean
        }

    def evaluate(self, sample, y_pred) -> Dict:
        y_true = sample['outputs']['segment']
        f1 = metric_max_over_ground_truths(squad_metrics.compute_f1, y_pred, y_true)
        em = metric_max_over_ground_truths(squad_metrics.compute_exact, y_pred, y_true)

        return {
          "f1": f1,
          "em": em
        }

    def test_split_name(self) -> str:
        return 'test'

    def prompt_split_name(self) -> str:
        return 'prompt'

    def create_messages(self, sample, with_answer=None) -> List[Dict]:
        messages = sample['messages']
        inputs = sample['inputs']
        for m in messages:
            m['content'] = m['content'].format(**inputs)
        return messages

In [8]:
task = QATask()

In [9]:
model_name = 'RefalMachine/ruadapt_qwen2.5_3B_ext_u48_instruct_v4'
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [10]:
only_target_loss = True
max_tokens_count = 512
datasets = []
for records in (dataset['train'], dataset['test']):
    datasets.append(
        ChatDataset(
            records,
            tokenizer,
            max_tokens_count=max_tokens_count,
            sample_rate=1.0,
            only_target_loss=only_target_loss,
            add_global_eos=False,
            add_global_bos=False
        )
    )
train_dataset, val_dataset = datasets

  3%|███████▏                                                                                                                                                                                                             | 125/3689 [00:00<00:05, 657.69it/s]

[147076, 882, 198, 54745, 38438, 9542, 102073, 105116, 71995, 7740, 93747, 102833, 13373, 100594, 25, 101036, 110789, 101728, 23934, 110810, 110646, 106429, 13373, 140652, 102677, 1532, 949, 11712, 35095, 15298, 6735, 4708, 1232, 220, 17, 21, 102085, 220, 16, 24, 16, 19, 100261, 100971, 110789, 101728, 23934, 110810, 110646, 106429, 13373, 140652, 102677, 1532, 11, 126363, 7952, 9706, 44075, 13999, 37013, 39900, 102105, 12769, 129098, 142569, 13289, 107667, 5591, 100449, 13373, 123724, 103417, 100046, 26, 220, 22, 18, 23, 123116, 118833, 105998, 108567, 26, 139544, 100269, 102453, 116244, 627, 147077, 198, 147076, 78191, 198, 60627, 48074, 25, 147077]
[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -10

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3689/3689 [00:04<00:00, 764.38it/s]
  8%|█████████████████▉                                                                                                                                                                                                     | 77/923 [00:00<00:01, 759.17it/s]

[147076, 882, 198, 20976, 112778, 101064, 13373, 105116, 71995, 93747, 102833, 13373, 100594, 25, 101036, 57855, 121589, 102542, 111427, 101997, 5927, 107230, 107092, 949, 11712, 35095, 15298, 6735, 4708, 1232, 57855, 121589, 102542, 111427, 101997, 5927, 107230, 107092, 5927, 220, 16, 24, 23, 23, 100281, 7740, 5524, 100505, 91146, 106189, 100798, 100882, 100778, 115574, 6856, 13373, 100366, 100414, 43896, 107263, 13, 23784, 122622, 107230, 107092, 57855, 121589, 106166, 19175, 123856, 13, 125639, 105508, 4655, 23934, 57855, 20346, 100532, 19175, 104223, 48355, 100356, 101051, 146478, 108150, 627, 147077, 198, 147076, 78191, 198, 60627, 48074, 25, 147077]
[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 923/923 [00:01<00:00, 750.04it/s]


In [11]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=f"cuda:0",
    torch_dtype=torch.float16,
    attn_implementation="sdpa",
)
prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(147097, 2048)
    (layers): ModuleList(
      (0-35): 36 x Qwen2DecoderLayer(
        (self_attn): Qwen2SdpaAttention(
          (q_proj): Linear4bit(in_features=2048, out_features=2048, bias=True)
          (k_proj): Linear4bit(in_features=2048, out_features=256, bias=True)
          (v_proj): Linear4bit(in_features=2048, out_features=256, bias=True)
          (o_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): Qwen2RotaryEmbedding()
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear4bit(in_features=2048, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=2048, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((2048,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((2048,), eps=1e-0

In [12]:
messages = [{'role': 'user', 'content': 'Напиши что такое LLM.'}]
tokenizer.apply_chat_template(messages, return_tensors='pt', add_special_tokens=True, add_generation_prompt=True)

tensor([[147076,    882,    198,  20807,  19619, 129192,  48489, 101765,    445,
          11237,     13, 147077,    198, 147076,  78191,    198]])

In [13]:
from transformers import GenerationConfig

def generate(messages, model, tokenizer, generation_config):
    input_ids = tokenizer.apply_chat_template(messages, return_tensors='pt', add_special_tokens=True, add_generation_prompt=True)
    input_ids = input_ids.to(model.device)
    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            generation_config=generation_config
        )
    outputs = []
    for sample_output_ids, sample_input_ids in zip(output_ids, input_ids):
        sample_output_ids = sample_output_ids[len(sample_input_ids):]
        sample_output = tokenizer.decode(sample_output_ids, skip_special_tokens=True)
        outputs.append(sample_output)

    if len(outputs) == 1:
        outputs = outputs[0]
    return outputs



generation_config = GenerationConfig.from_dict(
    {
        'top_k': 40,
        'top_p': 0.9,
        'temperature': 0.2,
        'repetition_penalty': 1.0,
        'max_new_tokens': 64,
        'do_sample': True,
        'pad_token_id': tokenizer.pad_token_id,
        'bos_token_id': tokenizer.bos_token_id,
        'eos_token_id': tokenizer.eos_token_id
    }
)
generation_config

GenerationConfig {
  "bos_token_id": 147075,
  "do_sample": true,
  "eos_token_id": 147077,
  "max_new_tokens": 64,
  "pad_token_id": 147075,
  "temperature": 0.2,
  "top_k": 40,
  "top_p": 0.9
}

In [14]:
generate(messages, model, tokenizer, generation_config)

'LLM (Law Master of Laws) — это степень высшего образования, присуждаемая юридическим факультетам университетов по окончании трех- или четырехгодичного обучения. ЛLM является специализированным юридическим образованием, которое позволяет студентам углубить свои знания в определенной области права, например, в международном праве, корпоративном праве, интеллекту'

In [15]:
lora_config = {
    "r": 16,
    "lora_alpha": 16,
    "lora_dropout": 0.0,
    "bias": "none",
    "target_modules": ["q_proj", "v_proj", "k_proj", "o_proj", "lm_head"]
}
lora_config = LoraConfig(**lora_config)
lora_config

LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type=None, inference_mode=False, r=16, target_modules={'q_proj', 'lm_head', 'k_proj', 'o_proj', 'v_proj'}, lora_alpha=16, lora_dropout=0.0, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, use_dora=False, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False))

In [16]:
lora_config.modules_to_save

In [17]:
model = get_peft_model(model, lora_config)
if model.config.tie_word_embeddings and lora_config.modules_to_save is not None and 'lm_head' in lora_config.modules_to_save:
    print('Tie embeddings')
    assert 'embed_tokens' not in lora_config.modules_to_save
    model.base_model.model.model.embed_tokens.weight = model.base_model.model.lm_head.modules_to_save["default"].weight



In [18]:
training_args = {
    "evaluation_strategy": "steps",
    "per_device_train_batch_size": 1,
    "per_device_eval_batch_size": 1,
    "gradient_accumulation_steps": 8,
    "eval_steps": 16,
    "save_steps": 128,
    "logging_steps": 1,
    "learning_rate": 0.00005,
    "num_train_epochs": 1,
    "lr_scheduler_type": "cosine",
    "warmup_steps": 16,
    "bf16": False,
    "fp16": True,
    "optim": "paged_adamw_8bit",
    "save_total_limit": 1,
    "seed": 1337,
    "max_grad_norm": 1.0,
    "weight_decay": 0.05
}
training_args = TrainingArguments(output_dir='./instruct', **training_args)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [19]:
data_collator = DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=8)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=data_collator,
)
if len(trainer.label_names) == 0:
    trainer.label_names.append('labels')

In [20]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss
16,3.3333,3.110122
32,0.385,0.255117
48,0.0062,0.005532
64,0.0011,0.001242
80,0.0007,0.000723
96,0.0006,0.000544
112,0.0004,0.000444
128,0.0003,0.000376
144,0.0003,0.000327
160,0.0003,0.00029


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=461, training_loss=0.18462595219644398, metrics={'train_runtime': 4762.4959, 'train_samples_per_second': 0.775, 'train_steps_per_second': 0.097, 'total_flos': 7832200427285760.0, 'train_loss': 0.18462595219644398, 'epoch': 0.9997289238275956})

In [21]:
generate(messages, model, tokenizer, generation_config)

  return fn(*args, **kwargs)


'LLM (Law Master of Laws) — это профессиональный диплом, который присуждается студентам после успешного окончания четырех- или пяти-летний программы обучения в области права.'

In [22]:
from transformers import AutoModel, AutoTokenizer
save_directory = "models/ruQwen-lora"

model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)



('models/ruQwen-lora/tokenizer_config.json',
 'models/ruQwen-lora/special_tokens_map.json',
 'models/ruQwen-lora/vocab.json',
 'models/ruQwen-lora/merges.txt',
 'models/ruQwen-lora/added_tokens.json',
 'models/ruQwen-lora/tokenizer.json')

In [23]:
task = QATask()

In [24]:
from llmtf.model import HFModel

model_name_or_path = 'models/ruQwen-lora'
model = HFModel(conversation_template_path='conversation_configs/qwen2.json', device_map='cuda:0', attn_implementation="sdpa")
model.from_pretrained(model_name_or_path)

model.generation_config.max_new_tokens = 200
model.generation_config.repetition_penalty = 1.0
model.generation_config.do_sample = False
model.generation_config.temperature = 0.0
model.generation_config

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

```python
from transformers import AutoModelForCausalLM

# Load original tied model
model = AutoModelForCausalLM.from_pretrained("google/gemma-2-2b-it", tie_word_embeddings=False)

# Set the randomly initialized lm_head to the previously tied embeddings
model.lm_head.weight.data = model.model.embed_tokens.weight.data.clone()

# Save the untied model
untied_model_dir = "dir/for/untied/model"
model.save_pretrained(untied_model_dir)
model.config.save_pretrained(untied_model_dir)

# Now use the original model but in untied format
model = AutoModelForCausalLM.from_pretrained(untied_model_dir)
```

INFO: 2024-11-10 21:45:24,274: llmtf.base.hfmodel: Set eos_token_id in generation_config to [147077]
INFO: 2024-11-10 21:45:24,275: llmtf.base.hfmodel: Model id: models/ruQwen-lora
INFO: 2024-11-10 21:45:24,276: llmtf.base.hfmodel: Leading space: False


GenerationConfig {
  "bos_token_id": 147075,
  "eos_token_id": [
    147077
  ],
  "max_length": 32768,
  "max_new_tokens": 200,
  "pad_token_id": 147075,
  "stop_strings": [
    "<|im_end|>"
  ],
  "temperature": 0.0,
  "top_k": 40,
  "top_p": 0.9
}

In [25]:
from llmtf.evaluator import Evaluator
evaluator = Evaluator()

evaluator.evaluate_dataset(
    task=task,
    model=model,
    output_dir='./qa_qwenFT',
    max_len=4000,
    few_shot_count=0,
    generation_config=None, 
    batch_size=4,
    max_sample_per_dataset=200
)

INFO: 2024-11-10 21:45:24,285: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [147077]
INFO: 2024-11-10 21:45:24,286: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|im_end|>']
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 1654.80it/s]
INFO: 2024-11-10 21:45:26,762: llmtf.base.kngrg/ru-QAmeleon: Loading Dataset: 2.48s
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:03<00:00, 13.75it/s]
INFO: 2024-11-10 21:45:30,401: llmtf.base.kngrg/ru-QAmeleon: Processing Dataset: 3.64s
INFO: 2024-11-10 21:45:30,402: llmtf.base.kngrg/ru-QAmeleon: Results for kngrg/ru-QAmeleon:
INFO: 2024-11-10 21:4

In [26]:
!cat ./qa_qwenFT/kngrg_ru-QAmeleon_total.jsonl

{
    "task_name": "kngrg/ru-QAmeleon",
    "results": {
        "f1": 0.425,
        "em": 0.425
    },
    "leaderboard_result": 0.425
}


In [27]:
from llmtf.model import HFModel

model_name_or_path = 'RefalMachine/ruadapt_qwen2.5_3B_ext_u48_instruct_v4'
orig_model = HFModel(conversation_template_path='conversation_configs/qwen2.json', device_map='cuda:0', attn_implementation="sdpa")
orig_model.from_pretrained(model_name_or_path)

orig_model.generation_config.max_new_tokens = 200
orig_model.generation_config.repetition_penalty = 1.0
orig_model.generation_config.do_sample = False
orig_model.generation_config.temperature = 0.0
orig_model.generation_config

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

INFO: 2024-11-10 21:45:34,771: llmtf.base.hfmodel: Set eos_token_id in generation_config to [147077]
INFO: 2024-11-10 21:45:34,772: llmtf.base.hfmodel: Model id: RefalMachine/ruadapt_qwen2.5_3B_ext_u48_instruct_v4
INFO: 2024-11-10 21:45:34,773: llmtf.base.hfmodel: Leading space: False


GenerationConfig {
  "bos_token_id": 147075,
  "eos_token_id": [
    147077
  ],
  "max_length": 32768,
  "max_new_tokens": 200,
  "pad_token_id": 147075,
  "stop_strings": [
    "<|im_end|>"
  ],
  "temperature": 0.0,
  "top_k": 40,
  "top_p": 0.9,
  "trust_remote_code": false
}

In [28]:
from llmtf.evaluator import Evaluator
evaluator = Evaluator()

evaluator.evaluate_dataset(
    task=task,
    model=orig_model,
    output_dir='./qa_qwen_orig',
    max_len=4000,
    few_shot_count=0,
    generation_config=None, 
    batch_size=4,
    max_sample_per_dataset=200
)

INFO: 2024-11-10 21:45:34,780: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [147077]
INFO: 2024-11-10 21:45:34,781: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|im_end|>']
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 1661.40it/s]
INFO: 2024-11-10 21:45:36,882: llmtf.base.kngrg/ru-QAmeleon: Loading Dataset: 2.10s
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [02:39<00:00,  3.18s/it]
INFO: 2024-11-10 21:48:16,042: llmtf.base.kngrg/ru-QAmeleon: Processing Dataset: 159.16s
INFO: 2024-11-10 21:48:16,043: llmtf.base.kngrg/ru-QAmeleon: Results for kngrg/ru-QAmeleon:
INFO: 2024-11-10 21

In [29]:
!cat ./qa_qwen_orig/kngrg_ru-QAmeleon_total.jsonl

{
    "task_name": "kngrg/ru-QAmeleon",
    "results": {
        "f1": 0.03360182205904116,
        "em": 0.0
    },
    "leaderboard_result": 0.01680091102952058
}
