In [None]:
# %%capture
# !pip install unsloth
# # Also get the latest nightly Unsloth!
# !pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [2]:
# One must patch the DPO Trainer first!
from unsloth import PatchDPOTrainer
PatchDPOTrainer()

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


Unsloth: Your Flash Attention 2 installation seems to be broken?
A possible explanation is you have a new CUDA version which isn't
yet compatible with FA2? Please file a ticket to Unsloth or FA2.
We shall now use Xformers instead, which does not have any performance hits!
We found this negligible impact by benchmarking on 1x A100.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 4096 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "shared-nvme/finetuned_llm_models/sft_lora_qwen_medical/", # Choose ANY! eg mistralai/Mistral-7B-Instruct-v0.2
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

==((====))==  Unsloth 2024.11.9: Fast Qwen2 patching. Transformers = 4.46.3.
   \\   /|    GPU: NVIDIA GeForce RTX 3090. Max memory: 23.684 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu124. CUDA = 8.6. CUDA Toolkit = 12.4.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards: 100%|██████████| 4/4 [00:06<00:00,  1.57s/it]
Unsloth 2024.11.9 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [4]:
import os
import re
from typing import List, Optional
from datasets import DatasetDict, concatenate_datasets, load_dataset, load_from_disk
from datasets.builder import DatasetGenerationError

DEFAULT_CHAT_TEMPLATE = "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"

def apply_chat_template(example, tokenizer):
    """使用新的 chat template 格式处理数据"""
    chat_template = """Below are some instructions that describe some tasks. Write responses that appropriately complete each request.

### Instruction:
{instruction}

### Response:
{response}"""

    if all(k in example.keys() for k in ("question", "response_chosen", "response_rejected")):
        # 构建 prompt（只包含问题）
        example["question"] = chat_template.replace(
            "{instruction}", example["question"]
        ).replace("{response}", "")
        
        # 构建 chosen 回答
        example["response_chosen"] = chat_template.replace(
            "{instruction}", example["question"]
        ).replace("{response}", example["response_chosen"])
        
        # 构建 rejected 回答
        example["response_rejected"] = chat_template.replace(
            "{instruction}", example["question"]
        ).replace("{response}", example["response_rejected"])
    else:
        raise ValueError(
            f"Missing required columns: question, response_chosen, response_rejected"
        )
    return example

def get_datasets(
    data_config: dict,
    shuffle: bool = True,
) -> DatasetDict:
    """加载数据集"""
    if type(data_config) is not dict:
        raise ValueError(f"Data config must be a dictionary")
    return mix_datasets(data_config, shuffle=shuffle)

def mix_datasets(dataset_mixer: dict, shuffle=True) -> DatasetDict:
    """混合数据集"""
    raw_datasets = DatasetDict()
    raw_train_datasets = []
    fracs = []
    
    for ds, frac in dataset_mixer.items():
        fracs.append(frac)
        try:
            dataset = load_dataset(ds, split="train")
        except DatasetGenerationError:
            dataset = load_from_disk(os.path.join(ds, "train"))
        raw_train_datasets.append(dataset)

    if any(frac < 0 for frac in fracs):
        raise ValueError("Dataset fractions cannot be negative")

    # 处理训练集
    if len(raw_train_datasets) > 0:
        train_subsets = []
        for dataset, frac in zip(raw_train_datasets, fracs):
            train_subset = dataset.select(range(int(frac * len(dataset))))
            train_subsets.append(train_subset)
        raw_datasets["train"] = concatenate_datasets(train_subsets)
        if shuffle:
            raw_datasets["train"] = raw_datasets["train"].shuffle(seed=42)
    else:
        raise ValueError(f"No datasets loaded")

    return raw_datasets

In [5]:
# 加载数据集
dataset = load_from_disk("shared-nvme/datasets/reward_train/")
print(dataset.column_names)
print(dataset)

# 应用模板
dataset = dataset.map(
    apply_chat_template,
    fn_kwargs={"tokenizer": tokenizer},
    num_proc=12,
    desc="Formatting comparisons with prompt template",
    keep_in_memory=True,
    load_from_cache_file=False
)

# 重命名列
dataset = dataset.rename_columns({
    "question": "prompt",
    "response_chosen": "chosen",
    "response_rejected": "rejected"
})


{'train': ['question', 'response_chosen', 'response_rejected']}
DatasetDict({
    train: Dataset({
        features: ['question', 'response_chosen', 'response_rejected'],
        num_rows: 3800
    })
})


Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 3800/3800 [00:01<00:00, 1967.35 examples/s]


<a name="Data"></a>
### Data Prep
We follow Huggingface's [Alignment Handbook](https://github.com/huggingface/alignment-handbook) for [Zephyr](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) and use the [Ultra Feedback dataset](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized), and sample 0.5% of it to speed things up. You can sample the full dataset for a full run.

In [6]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['prompt', 'chosen', 'rejected'],
        num_rows: 3800
    })
})


We shall print a random item from the dataset

In [7]:
import pprint
row = dataset['train'][8]
pprint.pprint(row["prompt"])
pprint.pprint(row["chosen"])
pprint.pprint(row["rejected"])

('Below are some instructions that describe some tasks. Write responses that '
 'appropriately complete each request.\n'
 '\n'
 '### Instruction:\n'
 '左结肠动脉直肠癌的手术治疗有些什么？\n'
 '\n'
 '### Response:\n')
('Below are some instructions that describe some tasks. Write responses that '
 'appropriately complete each request.\n'
 '\n'
 '### Instruction:\n'
 'Below are some instructions that describe some tasks. Write responses that '
 'appropriately complete each request.\n'
 '\n'
 '### Instruction:\n'
 '左结肠动脉直肠癌的手术治疗有些什么？\n'
 '\n'
 '### Response:\n'
 '\n'
 '\n'
 '### Response:\n'
 '根治术')
('Below are some instructions that describe some tasks. Write responses that '
 'appropriately complete each request.\n'
 '\n'
 '### Instruction:\n'
 'Below are some instructions that describe some tasks. Write responses that '
 'appropriately complete each request.\n'
 '\n'
 '### Instruction:\n'
 '左结肠动脉直肠癌的手术治疗有些什么？\n'
 '\n'
 '### Response:\n'
 '\n'
 '\n'
 '### Response:\n'
 '对于左结肠动脉直肠癌患者来说,外科切除是目前最有效的方法。')


We now add LoRA adapters so we only need to update 1 to 10% of all parameters!

In [8]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 64, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 64,
    lora_dropout = 0, # Currently only supports dropout = 0
    bias = "none",    # Currently only supports bias = "none"
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

TypeError: Unsloth: Your model already has LoRA adapters. Your new parameters are different.

<a name="Train"></a>
### Train the DPO model
Now let's use Huggingface TRL's `DPOTrainer`! More docs here: [TRL DPO docs](https://huggingface.co/docs/trl/dpo_trainer). We do 3 epochs on 0.5% of the dataset to speed things up.

In [9]:
# One must patch the DPO Trainer first!
from unsloth import PatchDPOTrainer
PatchDPOTrainer()

In [10]:
from transformers import TrainingArguments
from trl import DPOTrainer, DPOConfig
from unsloth import is_bfloat16_supported

dpo_trainer = DPOTrainer(
    model = model,
    ref_model = None,
    args = DPOConfig(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_ratio = 0.1,
        num_train_epochs = 3,
        learning_rate = 5e-6,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.0,
        lr_scheduler_type = "linear",
        seed = 42,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
    beta = 0.1,
    train_dataset = dataset["train"],
    # eval_dataset = raw_datasets["test"],
    tokenizer = tokenizer,
    max_length = 1024,
    max_prompt_length = 512,
)

Extracting prompt from train dataset: 100%|██████████| 3800/3800 [00:00<00:00, 17293.17 examples/s]
Applying chat template to train dataset: 100%|██████████| 3800/3800 [00:00<00:00, 22978.11 examples/s]
Tokenizing train dataset: 100%|██████████| 3800/3800 [00:04<00:00, 914.49 examples/s]


In [None]:
dpo_trainer.train()

In [22]:
FastLanguageModel.for_inference(model)
messages = [                    # Change below!
    {"role": "user", "content": "婴幼儿主动脉缩窄的并发症是什么"},
#     {"role": "user", "content": "Continue the fibonacci sequence! Your input is 1, 1, 2, 3, 5, 8,"},
]
input_ids = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True,
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
answer = model.generate(input_ids, 
                        streamer = text_streamer, 
                        max_new_tokens = 2560, 
                        pad_token_id = tokenizer.eos_token_id,
                        repetition_penalty=1.2,
                        no_repeat_ngram_size=2  # 防止连续重复的 2-gram
                       )

generated_text = tokenizer.decode(answer[0], skip_special_tokens=True)
start_index = generated_text.rfind('Response:')+len('Response:')
generated_text = generated_text[start_index:]
print(generated_text)

肾功能不全<|im_end|>

肾功能不全


In [15]:
model.save_pretrained("sft_dpo_lora_qwen_medical") # Local saving
tokenizer.save_pretrained("sft_dpo_lora_qwen_medical")

('sft_dpo_lora_qwen_medical/tokenizer_config.json',
 'sft_dpo_lora_qwen_medical/special_tokens_map.json',
 'sft_dpo_lora_qwen_medical/vocab.json',
 'sft_dpo_lora_qwen_medical/merges.txt',
 'sft_dpo_lora_qwen_medical/added_tokens.json',
 'sft_dpo_lora_qwen_medical/tokenizer.json')