In [None]:
from transformers import GPT2Tokenizer, AutoConfig, OPTForCausalLM

model_id = "facebook/opt-6.7b"
model = OPTForCausalLM.from_pretrained(model_id, load_in_8bits=True)  # Load in 8-bit
tokenizer = GPT2Tokenizer.from_pretrained(model_id)

In [None]:
from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model

model = prepare_model_for_int8_training(model)  # Prepare model for int8 training


## LoRA Configuration
config = LoraConfig(
    r=8,  # the rank of LoRA, which affects the size of the LoRA matrix
    lora_alpha=32,  # LoRA适应的比例因子
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "out_proj",
        "fc_in",
        "fc_out",
    ],  # 指定将LoRA应用到的模型模块，通常是attention和全连接层的投影
    lora_dropout=0.05,  # 在LoRA模块中使用的dropout率
    bias="none",  # 设置bias的使用方式，这里没有使用bias
    task_type="CAUSAL_LM",  # 任务类型，这里设置为因果(自回归）语言模型
)

model = get_peft_model(model, config)

In [None]:
from datasets import load_dataset
from transformers import DataCollatorForLanguageModeling

## data processing
dataset = load_dataset("Abirate/english_quotes")
tokenized_dataset = dataset.map(
    lambda samples: tokenizer(samples["quote"]), batched=True
)

## When mlm is set to False, the model is trained to predict each token based on the previous tokens, which is known as "Causal Language Modeling" or "Autoregressive Language Modeling".
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

In [None]:
## Fine-tuning
from transformers import TrainingArguments, Trainer

model_dir = "models"
training_args = TrainingArguments(
    output_dir=f"{model_dir}/{model_id}-lora",  # output directory
    per_device_train_batch_size=4,  # batch size per device during training
    learning_rate=2e-4,
    fp16=True,  # 启用混合精度训练，可以提高训练速度，同时减少内存使用
    logging_steps=20,  # used for tracking the progress of training
    max_steps=100,
)

trainer = Trainer(
    model=model,
    train_dataset=tokenized_dataset["train"],
    args=training_args,
    data_collator=data_collator,
)
model.use_cache = False
trainer.train()

model_path = f"{model_dir}/{model_id}-lora-int8"
model.save_pretrained(model_path)

lora_model = trainer.model
text = "Two things are infinite:"
inputs = tokenizer(text, return_tensors="pt").to(0)
out = lora_model.generate(**inputs, max_length=48)
print(tokenizer.decode(out[0], skip_special_tokens=True))