# Тонкая настройка предобученной модели

Следующий код написан на основе тьюториала https://deci.ai/blog/fine-tune-llama-2-with-lora-for-question-answering/

In [1]:
%pip install --quiet  peft bitsandbytes transformers trl accelerate

Note: you may need to restart the kernel to use updated packages.


# Подготовка данных для обучения

In [2]:
from datasets import load_dataset, Dataset

raw_datasets = load_dataset("abobster/pushkin_new")

def preprocess(dataset): 
    poems = '\n'.join(dataset['text']).split('</s>')
    poems = ['<s>' + poem.strip() + '</s>' for poem in poems]
    return {'text': poems}

raw_datasets = raw_datasets.map(preprocess, batched=True, batch_size=-1)  # https://huggingface.co/docs/datasets/v2.14.5/en/package_reference/main_classes#datasets.Dataset.map

raw_datasets

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 522
    })
    test: Dataset({
        features: ['text'],
        num_rows: 60
    })
})

# Загрузка и квантизация предобученной модели и токенайзера

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

model_name = "meta-llama/Llama-2-7b-hf"

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.padding_side = "right"
tokenizer.add_bos_token = False
tokenizer.add_eos_token = False

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quant_config)  # the model has already been set to the correct devices and casted to the correct `dtype`

model_size = sum(t.numel() for t in model.parameters())
print(f"model_name: {model_name}")
print(f"model_size: {model_size/1000**2:.1f}M")
print(tokenizer.special_tokens_map)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

model_name: meta-llama/Llama-2-7b-hf
model_size: 3500.4M
{'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<unk>'}


# Дообучение модели

In [4]:
from transformers import TrainingArguments, EarlyStoppingCallback
from peft import LoraConfig
from trl import SFTTrainer

peft_parameters = LoraConfig(  # https://huggingface.co/docs/peft/conceptual_guides/lora
    lora_alpha=16,
    lora_dropout=0.1,
    r=8,
    bias="none",
    task_type="CAUSAL_LM"
)

batch_size = 4  # Максимум, что влазит в память GPU
max_seq_length = 256  # Только первые слова стихотворения учитываются

train_args = TrainingArguments(  # https://huggingface.co/docs/transformers/v4.34.0/en/main_classes/trainer#transformers.TrainingArguments
    report_to='tensorboard',
    output_dir='.results',

    max_steps=10000,
    load_best_model_at_end=True,

    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    evaluation_strategy='epoch',
    logging_strategy='epoch',
    save_strategy='epoch',

    optim="paged_adamw_32bit",
    learning_rate=1e-5,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
)

trainer = SFTTrainer(  # https://huggingface.co/docs/trl/v0.7.1/en/trainer#trl.SFTTrainer
    model=model,
    args=train_args,
    peft_config=peft_parameters,
    tokenizer=tokenizer,
    train_dataset=raw_datasets['train'],
    eval_dataset=raw_datasets['test'],
    dataset_text_field="text",
    # packing=True,
    max_seq_length=max_seq_length,
    callbacks=[EarlyStoppingCallback(3)],
    dataset_batch_size=batch_size,
)

trainer.train()

trainer.save_model('.7_fine_tuning')


You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss
1,2.9565,2.922296
2,2.8826,2.86768
3,2.8321,2.834849
4,2.7685,2.767774
5,2.7092,2.741626
6,2.6804,2.731407
7,2.6593,2.72374
8,2.6405,2.718449
9,2.621,2.715953
10,2.6062,2.713273


# Генерация текста

In [5]:
# from transformers import AutoModelForCausalLM, AutoTokenizer
# import torch
# model = AutoModelForCausalLM.from_pretrained('.7_fine_tuning', local_files_only=True, load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)
# tokenizer = AutoTokenizer.from_pretrained('.7_fine_tuning', local_files_only=True)
# tokenizer.add_bos_token = False

In [26]:
prefix = '<s>О сколько нам открытий чудных'

inputs = tokenizer(prefix, return_tensors='pt').to('cuda')

outputs = model.generate(  # https://huggingface.co/docs/transformers/main_classes/text_generation
    **inputs, 
    do_sample=True,
    max_new_tokens=200,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id,
    )

print(tokenizer.decode(outputs[0]))

<s> О сколько нам открытий чудных!
О сколько сладких снов!
И нам, вдохновенным духом,
Небесный мир приятный.
Среди ветвей, утренней красоты,
Среди одуванчиков,
Среди пышных цветов,
Среди златок и цветов,
Среди снов и песен,
Всем нам предстоит увидеть
Судный день, славный день,
Высшую радость,
Сыновей и дочерей
Отдать покойным душам,
Чувствительным умовам,
И погребение им.
Скажите, кто же сможет
Все это увидеть?
Кто будет в том дне?

