In [1]:
!nvidia-smi

Sun Oct 27 18:33:01 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   58C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
!pip install accelerate
!pip install transformers==4.45.2
!pip install bitsandbytes
!pip install datasets
!pip install rouge-score
!pip install pymorphy2
!pip install peft
!pip install flash_attn

Collecting flash_attn
  Using cached flash_attn-2.6.3.tar.gz (2.6 MB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: flash_attn
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py bdist_wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Building wheel for flash_attn (setup.py) ... [?25lerror
[31m  ERROR: Failed building wheel for flash_attn[0m[31m
[0m[?25h  Running setup.py clean for flash_attn
Failed to build flash_attn
[31mERROR: ERROR: Failed to build installable wheels for some pyproject.toml based projects (flash_attn)[0m[31m
[0m

In [5]:
!git clone https://github.com/RefalMachine/llmtf_open
%cd llmtf_open
!wget https://raw.githubusercontent.com/dialogue-evaluation/RuOpinionNE-2024/master/train.jsonl

fatal: destination path 'llmtf_open' already exists and is not an empty directory.
/content/llmtf_open
--2024-10-27 18:47:22--  https://raw.githubusercontent.com/dialogue-evaluation/RuOpinionNE-2024/master/train.jsonl
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1291979 (1.2M) [text/plain]
Saving to: ‘train.jsonl.1’


2024-10-27 18:47:22 (142 MB/s) - ‘train.jsonl.1’ saved [1291979/1291979]



In [6]:
!ls

conversation_configs  examples	 requirements.txt		      todo.txt
Dockerfile	      llmtf	 run_evaluate_multinode_multigpu.py   train.jsonl
eval_grammar.py       qa_qwen	 run_evaluate_multinode_multigpu.sh   train.jsonl.1
evaluate_model.py     README.md  run_evaluate_singlenode_multigpu.sh


In [36]:
dataset = load_dataset("imvladikon/QAmeleon")

def transform_sample(sample):
    if sample['language'] != 'ru':
        return None

    user_prompts = [
        "Опираясь на данный текст ответьте на вопрос: {question} '''Текст''': {passage}\n",
        "Прочитайте данный текст и ответьте на вопрос: {question} '''Текст''': {passage}\n"
    ]
    user_prompt = random.choice(user_prompts)

    return {
        "messages": [
            {"role": "user", "content": user_prompt},
            {"role": "bot", "content": "Ответ:"}
        ],
        "inputs": {
            "question": sample['question'],
            "passage": sample['passage']
        },
        "outputs": {
            "label": "passage",
            "length": len(sample['answer'].split()),
            "segment": sample['answer']
        }
    }

transformed_data = [transform_sample(sample) for sample in dataset['train'] if transform_sample(sample) is not None]

test_size = int(0.83 * len(transformed_data))
random.seed(42)
random.shuffle(transformed_data)
test_data = transformed_data[:test_size]
prompt_data = transformed_data[test_size:]

def create_dataset(data):
    return Dataset.from_dict({
        "messages": [item["messages"] for item in data],
        "inputs": [item["inputs"] for item in data],
        "outputs": [item["outputs"] for item in data]
    })

test_dataset = create_dataset(test_data)
prompt_dataset = create_dataset(prompt_data)

qa_dataset = DatasetDict({
    "test": test_dataset,
    "prompt": prompt_dataset
})


In [None]:
qa_dataset.push_to_hub("kngrg/ru-QAmeleon", private=False)

In [7]:
import codecs
import json
import copy
from collections import OrderedDict, defaultdict
import numpy as np
from tqdm import tqdm
import os
from datasets import load_dataset, Dataset
from typing import Dict, List, Tuple
from llmtf.metrics import mean, metric_max_over_ground_truths, f1_macro_score
import transformers.data.metrics.squad_metrics as squad_metrics
import re
from llmtf.base import Task, SimpleFewShotHFTask, LLM
from difflib import SequenceMatcher
import pandas as pd
import string
from sklearn.metrics import recall_score, precision_score

def metric_max_over_true(metric_fn, prediction, ground_truths):
    scores_for_ground_truths = []
    for ground_truth in ground_truths:
        score = metric_fn(ground_truth, prediction)
        scores_for_ground_truths.append(score)
    return max(scores_for_ground_truths)



class QATask(SimpleFewShotHFTask):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.method = 'generate'
        self.dataset_name = 'QAmeleon'
        self._max_new_tokens = 64

    @classmethod
    def name(cls):
        return 'kngrg/ru-QAmeleon'

    def dataset_args(self) -> Dict:
        return {'path': 'kngrg/ru-QAmeleon'}

    def aggregation(self) -> Dict:
        return {
          "f1": mean,
          "em": mean
        }

    def evaluate(self, sample, y_pred) -> Dict:
        y_true = sample['outputs']['segment']
        f1 = metric_max_over_ground_truths(squad_metrics.compute_f1, y_pred, y_true)
        em = metric_max_over_ground_truths(squad_metrics.compute_exact, y_pred, y_true)

        return {
          "f1": f1,
          "em": em
        }

    def test_split_name(self) -> str:
        return 'test'

    def prompt_split_name(self) -> str:
        return 'prompt'

    def create_messages(self, sample, with_answer=None) -> List[Dict]:
        messages = sample['messages']
        inputs = sample['inputs']
        for m in messages:
            m['content'] = m['content'].format(**inputs)
        return messages


In [8]:
task = QATask()

In [None]:
#api_base = 'http://89.169.128.106:5000' # mistralai/Mistral-Nemo-Instruct-2407
#api_base = 'http://89.169.128.106:5001' # Qwen/Qwen2.5-14B-Instruct
#api_base = 'http://89.169.128.106:5002' # RefalMachine/ruadapt_qwen2.5_3B_ext_u48_instruct_v4

from llmtf.model import HFModel

model_name_or_path = 'Qwen/Qwen2.5-3B-Instruct'
model = HFModel(conversation_template_path='conversation_configs/qwen2.json', device_map='cuda:0', attn_implementation="sdpa")
model.from_pretrained(model_name_or_path)

model.generation_config.max_new_tokens = 200
model.generation_config.repetition_penalty = 1.0
model.generation_config.do_sample = False
model.generation_config.temperature = 0.0
model.generation_config

In [50]:
from llmtf.evaluator import Evaluator
evaluator = Evaluator()

evaluator.evaluate_dataset(
    task=task,
    model=model,
    output_dir='./qa_qwen',
    max_len=4000,
    few_shot_count=0,
    generation_config=None, # will use model.generation_config by default
    batch_size=4,
    max_sample_per_dataset=200
)

INFO: 2024-10-27 18:08:43,319: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645]
INFO:llmtf.base.hfmodel:Updated generation_config.eos_token_id: [151645]
INFO: 2024-10-27 18:08:43,323: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|im_end|>']
INFO:llmtf.base.hfmodel:Updated generation_config.stop_strings: ['<|im_end|>']
100%|██████████| 200/200 [00:00<00:00, 1320.98it/s]
INFO: 2024-10-27 18:08:46,380: llmtf.base.kngrg/ru-QAmeleon: Loading Dataset: 3.05s
INFO:llmtf.base.kngrg/ru-QAmeleon:Loading Dataset: 3.05s
100%|██████████| 50/50 [20:02<00:00, 24.05s/it]
INFO: 2024-10-27 18:28:48,677: llmtf.base.kngrg/ru-QAmeleon: Processing Dataset: 1202.29s
INFO:llmtf.base.kngrg/ru-QAmeleon:Processing Dataset: 1202.29s
INFO: 2024-10-27 18:28:48,680: llmtf.base.kngrg/ru-QAmeleon: Results for kngrg/ru-QAmeleon:
INFO:llmtf.base.kngrg/ru-QAmeleon:Results for kngrg/ru-QAmeleon:
INFO: 2024-10-27 18:28:48,686: llmtf.base.kngrg/ru-QAmeleon: {'f1': 0.028563285427308537, '

In [51]:
!ls ./qa_qwen

kngrg_ru-QAmeleon.jsonl  kngrg_ru-QAmeleon_params.jsonl  kngrg_ru-QAmeleon_total.jsonl


In [52]:
!cat ./qa_qwen/kngrg_ru-QAmeleon_total.jsonl

{
    "task_name": "kngrg/ru-QAmeleon",
    "results": {
        "f1": 0.028563285427308537,
        "em": 0.0
    },
    "leaderboard_result": 0.014281642713654268
}


In [53]:
!cat ./qa_qwen/kngrg_ru-QAmeleon_params.jsonl

{
    "custom_generation_config": null,
    "model_params": {
        "model_name_or_path": "Qwen/Qwen2.5-3B-Instruct",
        "generation_config": {
            "eos_token_id": [
                151645
            ],
            "max_length": 32768,
            "max_new_tokens": 200,
            "pad_token_id": 151643,
            "stop_strings": [
                "<|im_end|>"
            ],
            "temperature": 0.0,
            "top_k": 40,
            "top_p": 0.9,
            "transformers_version": "4.45.2",
            "trust_remote_code": false
        },
        "conversation_template": {
            "system_prompt": "",
            "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
            "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
            "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
            "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
            "user_role": "use

In [9]:
#api_base = 'http://89.169.128.106:5000' # mistralai/Mistral-Nemo-Instruct-2407
#api_base = 'http://89.169.128.106:5001' # Qwen/Qwen2.5-14B-Instruct
#api_base = 'http://89.169.128.106:5002' # RefalMachine/ruadapt_qwen2.5_3B_ext_u48_instruct_v4

from llmtf.model import HFModel

model_name_or_path = 'RefalMachine/ruadapt_qwen2.5_3B_ext_u48_instruct_v4'
model = HFModel(conversation_template_path='conversation_configs/qwen2.json', device_map='cuda:0', attn_implementation="sdpa")
model.from_pretrained(model_name_or_path)

model.generation_config.max_new_tokens = 200
model.generation_config.repetition_penalty = 1.0
model.generation_config.do_sample = False
model.generation_config.temperature = 0.0
model.generation_config

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.6k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/195 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.17k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/3.37M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/2.30M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/12.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/605 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/759 [00:00<?, ?B/s]

INFO: 2024-10-27 18:51:12,263: llmtf.base.hfmodel: Set eos_token_id in generation_config to [147077]
INFO:llmtf.base.hfmodel:Set eos_token_id in generation_config to [147077]
INFO: 2024-10-27 18:51:12,270: llmtf.base.hfmodel: Model id: RefalMachine/ruadapt_qwen2.5_3B_ext_u48_instruct_v4
INFO:llmtf.base.hfmodel:Model id: RefalMachine/ruadapt_qwen2.5_3B_ext_u48_instruct_v4
INFO: 2024-10-27 18:51:12,273: llmtf.base.hfmodel: Leading space: False
INFO:llmtf.base.hfmodel:Leading space: False


GenerationConfig {
  "bos_token_id": 147075,
  "eos_token_id": [
    147077
  ],
  "max_length": 32768,
  "max_new_tokens": 200,
  "pad_token_id": 147075,
  "stop_strings": [
    "<|im_end|>"
  ],
  "temperature": 0.0,
  "top_k": 40,
  "top_p": 0.9,
  "trust_remote_code": false
}

In [10]:
from llmtf.evaluator import Evaluator
evaluator = Evaluator()

evaluator.evaluate_dataset(
    task=task,
    model=model,
    output_dir='./qa_qwen_adapt',
    max_len=4000,
    few_shot_count=0,
    generation_config=None, # will use model.generation_config by default
    batch_size=4,
    max_sample_per_dataset=200
)

INFO: 2024-10-27 18:51:12,305: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [147077]
INFO:llmtf.base.hfmodel:Updated generation_config.eos_token_id: [147077]
INFO: 2024-10-27 18:51:12,312: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['<|im_end|>']
INFO:llmtf.base.hfmodel:Updated generation_config.stop_strings: ['<|im_end|>']
100%|██████████| 200/200 [00:00<00:00, 920.16it/s] 
INFO: 2024-10-27 18:51:16,847: llmtf.base.kngrg/ru-QAmeleon: Loading Dataset: 4.53s
INFO:llmtf.base.kngrg/ru-QAmeleon:Loading Dataset: 4.53s
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)
100%|██████████| 50/50 [09:07<00:00, 10.96s/it]
INFO: 2024-10-27 19:00:24,801: llmtf.base.kngrg/ru-QAmeleon: Processing Dataset: 547.95s
INFO:llmtf.base.kngrg/ru-QAmeleon:Processing Dataset: 547.95s
INFO: 2024-10-27 19:00:24,806: llmtf.base.kngrg/ru-QAmeleon: Results for kngrg/ru-QAmeleon:
INFO:llmtf.base.kngrg

In [11]:
!ls ./qa_qwen_adapt

kngrg_ru-QAmeleon.jsonl  kngrg_ru-QAmeleon_params.jsonl  kngrg_ru-QAmeleon_total.jsonl


In [12]:
!cat ./qa_qwen_adapt/kngrg_ru-QAmeleon_total.jsonl

{
    "task_name": "kngrg/ru-QAmeleon",
    "results": {
        "f1": 0.03285677635076812,
        "em": 0.0
    },
    "leaderboard_result": 0.01642838817538406
}


In [13]:
!cat ./qa_qwen_adapt/kngrg_ru-QAmeleon_params.jsonl

{
    "custom_generation_config": null,
    "model_params": {
        "model_name_or_path": "RefalMachine/ruadapt_qwen2.5_3B_ext_u48_instruct_v4",
        "generation_config": {
            "bos_token_id": 147075,
            "eos_token_id": [
                147077
            ],
            "max_length": 32768,
            "max_new_tokens": 200,
            "pad_token_id": 147075,
            "stop_strings": [
                "<|im_end|>"
            ],
            "temperature": 0.0,
            "top_k": 40,
            "top_p": 0.9,
            "transformers_version": "4.45.2",
            "trust_remote_code": false
        },
        "conversation_template": {
            "system_prompt": "",
            "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
            "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
            "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
            "bot_message_template_incomplete":