In [1]:
%%capture

%pip install -U peft
%pip install -U trl
%pip install -U bitsandbytes 

In [None]:
import os, torch, wandb

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)

from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format
from dataclasses import dataclass

## Setup Huggingface 🤗 & Wandb

In [4]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("Proxima_hf")

login(token = hf_token)

wb_token = user_secrets.get_secret("Proxima_wb")

wandb.login(key=wb_token)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [5]:
@dataclass
class Config:
#     model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
#     model_name = "AnatoliiPotapov/T-lite-instruct-0.1"
    model_name = "google/gemma-2-9b-it"
    dataset_name = "/kaggle/input/proxima-data-qa"
#     new_model = "llama-3.1-8b-proxima"
    new_model = "gemma-2-9b-it-proxima"
    torch_dtype = torch.float16
    attn_implementation = "eager"
cfg = Config()

# Loading model and tokenizer

In [6]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=cfg.torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    cfg.model_name,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=cfg.attn_implementation
)

config.json:   0%|          | 0.00/857 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/39.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/3.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

In [7]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(cfg.model_name)
model, tokenizer = setup_chat_format(model, tokenizer)
tokenizer.padding_side = 'right'
tokenizer.padding_token = '<|pad|>'

tokenizer_config.json:   0%|          | 0.00/47.0k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

## LoRA adapter

In [8]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model = get_peft_model(model, peft_config)

# Data

In [9]:
dataset = load_dataset(cfg.dataset_name, split="all")

Generating train split: 0 examples [00:00, ? examples/s]

In [10]:
dataset

Dataset({
    features: ['Unnamed: 0', 'question', 'content', 'category', 'question_changed', 'content_changed', 'category_changed', 'catalog'],
    num_rows: 1676
})

## Format to chat 

In [11]:
def format_chat_template(row):
    row_json = [{"role": "user", "content": row["question_changed"]},
               {"role": "assistant", "content": row["content_changed"]}]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

In [12]:
dataset = dataset.map(
    format_chat_template,
    num_proc=4,
)

  self.pid = os.fork()


Map (num_proc=4):   0%|          | 0/1676 [00:00<?, ? examples/s]

  self.pid = os.fork()


## Select only part

In [13]:
dataset_sh = dataset.shuffle(seed=911).select(range(1676))

In [14]:
dataset_sh

Dataset({
    features: ['Unnamed: 0', 'question', 'content', 'category', 'question_changed', 'content_changed', 'category_changed', 'catalog', 'text'],
    num_rows: 1676
})

In [15]:
dataset_sh = dataset_sh.train_test_split(0.1)
dataset_sh

DatasetDict({
    train: Dataset({
        features: ['Unnamed: 0', 'question', 'content', 'category', 'question_changed', 'content_changed', 'category_changed', 'catalog', 'text'],
        num_rows: 1508
    })
    test: Dataset({
        features: ['Unnamed: 0', 'question', 'content', 'category', 'question_changed', 'content_changed', 'category_changed', 'catalog', 'text'],
        num_rows: 168
    })
})

# Train model

In [16]:
training_arguments = TrainingArguments(
    output_dir=cfg.new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
#     num_train_epochs=1,
    max_steps=500,
    eval_strategy="steps",
    eval_steps=500,
    logging_steps=100,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb",
#     run_name="Llama-3.1-proxima",
    run_name="gemma-2-9b-it-proxima",
)

In [17]:
dataset_sh["train"]['text'][0]

'<|im_start|>user\nстатус уволен сотрудник в личный кабинет<|im_end|>\n<|im_start|>assistant\nпо данному вопросу вы можете обратиться в кадровую службу, создав заявку "консультация по hr вопросам"<|im_end|>\n'

In [18]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset_sh["train"],
    eval_dataset=dataset_sh["test"],
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/1508 [00:00<?, ? examples/s]

Map:   0%|          | 0/168 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [19]:
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33ms-v-savoskin[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss
500,0.6579,0.594068




TrainOutput(global_step=500, training_loss=0.9965035552978516, metrics={'train_runtime': 923.4595, 'train_samples_per_second': 1.083, 'train_steps_per_second': 0.541, 'total_flos': 5018939197289472.0, 'train_loss': 0.9965035552978516, 'epoch': 0.6631299734748011})

In [20]:
path_to_save = "Llama-finetuned"
trainer.save_model(path_to_save)
model.save_pretrained(path_to_save)
tokenizer.save_pretrained(path_to_save)



('Llama-finetuned/tokenizer_config.json',
 'Llama-finetuned/special_tokens_map.json',
 'Llama-finetuned/tokenizer.model',
 'Llama-finetuned/added_tokens.json',
 'Llama-finetuned/tokenizer.json')

In [21]:
# del model, tokenizer, trainer

# Compare models

## Init casual LLM

In [22]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=cfg.torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
casual_model = AutoModelForCausalLM.from_pretrained(
    cfg.model_name,
    quantization_config=bnb_config,
#     device_map="auto",
    attn_implementation=cfg.attn_implementation
)

tokenizer = tokenizer = AutoTokenizer.from_pretrained(cfg.model_name)
tokenizer.padding_side = 'right'
tokenizer.padding_token = '<|pad_token|>'

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [23]:
casual_model, tokenizer = setup_chat_format(casual_model, tokenizer)

## Get answers

In [24]:
def generate_answer(model, prompt):
    chat = [
        { "role": "user", "content": prompt },
    ]
    prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
    outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=150)

    return(tokenizer.decode(outputs[0]))

# Comprasion

In [25]:
q1 = "как взять отгул"
q2 = "не открываается моя карьера у сотрудника"
q3 = "не приходит логин пароль"

In [26]:
generate_answer(model, q1)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


'<|im_start|>user\nкак взять отгул<|im_end|>\n<|im_start|>assistant\nотгул можно взять, если отпуск по заявлению работник. отпуск по заявлению работник. отпуск по заявлению работник. отпуск по заявлению работник. отпуск по заявлению работник. отпуск по заявлению работник. отпуск по заявлению работник. отпуск по заявлению работник. отпуск по заявлению работник. отпуск по заявлению работник. отпуск по заявлению работник. отпуск по заявлению работник. отпуск по заявлению работник. отпуск по заявлению работник. отпуск по заявлению работник. отпуск по заявлению работник. отпуск по заявлению работник. отпуск по заявлению работник'

In [27]:
generate_answer(model, q2)

'<|im_start|>user\nне открываается моя карьера у сотрудника<|im_end|>\n<|im_start|>assistant\nсоздать заявку на сотрудника можно в разделе "зар – заявки на сотрудни". инструкция доступна по ссылке https://company-x5.ru/cms/z5/100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000'

In [28]:
generate_answer(model, q3)

'<|im_start|>user\nне приходит логин пароль<|im_end|>\n<|im_start|>assistant\nпри проблемах со входом в личный кабинет, прежде чем создавать заявку в поддержку, убедитесь, что заходите в личный кабинет на сайте https://company-x5.ru, указываете актуальные и верные логин и пароль. если вам неизвестен логин, обратитесь к руководителю (дм), он сможет посмотреть ваш логин и сбросить пароль в веб-табеле. для самостоятельного сброса пароля позвоните с вашего мобильного телефона на +7 (xxx) xxx xx xx, наберите добавочный номер 10100, нажмите * и подтвердите сброс пароля, нажав #. обновленный пароль отправьте по sms на +7 ('

In [None]:
print(generate_answer(casual_model, q1))

In [31]:
generate_answer(casual_model, q2)

'<|im_start|>user\nне открываается моя карьера у сотрудника<|im_end|>\n<|im_start|>assistant\n<end_of_turn>\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\

In [32]:
generate_answer(casual_model, 'hi')

'<|im_start|>user\nhi<|im_end|>\n<|im_start|>assistant\n<end_of_turn><end_of_turn><eos><eos><end_of_turn><eos><end_of_turn><eos><end_of_turn><eos><end_of_turn><eos><end_of_turn><eos><end_of_turn><eos><end_of_turn><end_of_turn><eos><end_of_turn><eos><end_of_turn><end_of_turn><eos><end_of_turn><end_of_turn><eos><end_of_turn><end_of_turn><eos><end_of_turn><end_of_turn><eos><end_of_turn><eos><end_of_turn><eos><end_of_turn><eos><end_of_turn><end_of_turn><eos><end_of_turn><end_of_turn><eos><end_of_turn><end_of_turn><eos><end_of_turn><end_of_turn><eos><end_of_turn><end_of_turn><eos><end_of_turn><end_of_turn><eos><end_of_turn><end_of_turn><eos><end_of_turn><eos><end_of_turn><eos><end_of_turn><eos><end_of_turn><eos><end_of_turn>.<end_of_turn><eos><end_of_turn>.<end_of_turn><eos><end_of_turn>.<end_of_turn><end_of_turn><eos><end_of_turn>.<end_of_turn><eos><end_of_turn>.<end_of_turn><end_of_turn><eos><end_of_turn>.<end_of_turn><end_of_turn><eos><end_of_turn>.<end_of_turn><eos><end_of_turn>.<end_of