In [9]:
def prepare_hh_rlhf_dataset(example):
    chosen_parts = example['chosen'].split('\n\nHuman: ')
    if len(chosen_parts) > 1:
        human_input = chosen_parts[1].split('\n\nAssistant: ')[0]
        assistant_output = chosen_parts[1].split('\n\nAssistant: ')[1].split('\n\nHuman: ')[0]
        return {
            "input": human_input.strip(),
            "output": assistant_output.strip()
        }
    else:
        return {"input": "", "output": ""}

In [10]:
hh_rlhf_dataset = load_dataset("hh-rlhf", split="train")

In [11]:
processed_hh_rlhf = hh_rlhf_dataset.map(prepare_hh_rlhf_dataset, remove_columns=hh_rlhf_dataset.column_names)

In [12]:
shuffled_rlhf = processed_hh_rlhf.shuffle(seed=42)

In [13]:
processed_hh_rlhf = shuffled_rlhf.select(range(10000))

In [14]:
processed_hh_rlhf[0]

{'input': 'Why did cells originally combine together to create life?',
 'output': 'Because their simple components -- chemicals -- interacted in particular ways.  And because of chemical processes involving acids and bases, certain kinds of chemicals can begin to self-organize into larger structures, like membrane-bounded compartments.  And it’s from those compartments that life eventually emerged.'}

In [15]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
import torch

In [16]:
# 1. 加载模型和分词器
model_name = "llama3"  # 请替换为您要微调的模型
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True, torch_dtype=torch.float16, device_map='auto')
tokenizer.pad_token = tokenizer.eos_token

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [17]:
# 2. 准备模型进行QLora训练
model = prepare_model_for_kbit_training(model)

In [18]:
# 3. 配置LoRA
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

In [24]:
def tokenize_function(examples):
    # 将输入和输出列表合并为单个字符串列表
    combined_texts = [f"{inp} {out}" for inp, out in zip(examples["input"], examples["output"])]
    # 对合并后的文本进行标记化
    model_inputs = tokenizer(combined_texts, max_length=512, truncation=True, padding='max_length')
    return model_inputs

In [25]:
tokenized_dataset = processed_hh_rlhf.map(tokenize_function, batched=True, remove_columns=processed_hh_rlhf.column_names)

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [28]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    save_steps=1000,
    logging_steps=100,
    learning_rate=2e-4,
    weight_decay=0.01,
    fp16=True,
    optim="adamw_torch",
)

In [29]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False),
)

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [30]:
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33ms1820587[0m. Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
100,2.0324
200,1.927
300,1.8953
400,1.8905
500,1.8911
600,1.8807
700,1.8446
800,1.8306
900,1.8254
1000,1.8094




TrainOutput(global_step=1875, training_loss=1.831287833658854, metrics={'train_runtime': 3853.2414, 'train_samples_per_second': 7.786, 'train_steps_per_second': 0.487, 'total_flos': 6.9228199673856e+17, 'train_loss': 1.831287833658854, 'epoch': 3.0})

In [31]:
model.save_pretrained("finetuned_model")