In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "bigscience/bloomz-560m"
#model_name="bigscience/bloom-1b1"

tokenizer = AutoTokenizer.from_pretrained(model_name)
foundation_model = AutoModelForCausalLM.from_pretrained(model_name)

In [None]:
!huggingface-cli login --token hf_ZkqzrDxSlmqd

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
The token `shasha` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `shasha`


In [None]:
def build_prompt(record):
    """把 Alpaca 三段字段拼成一条指令 Prompt"""
    if record["input"]:
        return (
            f"### Instruction:\n{record['instruction']}\n\n"
            f"### Input:\n{record['input']}\n\n"
            f"### Response:\n{record['output']}"
        )
    else:  # 没有 input 的条目 _for alpaca
        return (
            f"### Instruction:\n{record['instruction']}\n\n"
            f"### Response:\n{record['output']}"
        )

In [None]:
# 一个简单的推理函数
def get_outputs(model, inputs, max_new_tokens=100):
    outputs = model.generate(
        input_ids=inputs["input_ids"].to(model.device),
        attention_mask=inputs["attention_mask"].to(model.device),
        max_new_tokens=max_new_tokens,
        repetition_penalty=1.5, # 避免模型复读，默认值为1.0
        eos_token_id=tokenizer.eos_token_id
    )
    return outputs

In [None]:
# 测试一下这个推理函数
input_sentences = tokenizer("I love this movie because", return_tensors="pt")
foundational_outputs_sentence = get_outputs(foundation_model, input_sentences, max_new_tokens=50)

print(tokenizer.batch_decode(foundational_outputs_sentence, skip_special_tokens=True))

['I love this movie because it is so funny and I am sure that my friends will enjoy too']


In [None]:
cutoff_len = 512        # 你自己决定 —— 通常 256 / 512 / 1024
train_on_inputs = False # 常用设置：只让模型学习回答，不回传问句梯度
add_eos_token  = True

def tokenize(prompt, add_eos_token=True):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=cutoff_len,
        padding=False,
        return_tensors=None,
    )
    # 补 <eos>
    if (result["input_ids"][-1] != tokenizer.eos_token_id
            and len(result["input_ids"]) < cutoff_len
            and add_eos_token):
        result["input_ids"].append(tokenizer.eos_token_id)
        result["attention_mask"].append(1)

    result["labels"] = result["input_ids"].copy()  # 先默认全部监督
    return result

def generate_and_tokenize_prompt(dp):
    full_prompt = build_prompt(dp)
    tokenized = tokenize(full_prompt)

    if not train_on_inputs:  # 把问句位置 mask 成 -100
        user_prompt = build_prompt({**dp, "output": ""})  # 只 instruction+input
        user_tok = tokenize(user_prompt, add_eos_token=add_eos_token)
        n = len(user_tok["input_ids"]) - (1 if add_eos_token else 0)
        tokenized["labels"] = [-100] * n + tokenized["labels"][n:]

    return tokenized

In [None]:
from datasets import load_dataset
# dataset = "noob123/imdb_review_3000"
alpaca_ds = load_dataset("tatsu-lab/alpaca")["train"]
#Create the Dataset to create prompts.
# data = load_dataset(dataset)
# data = load_dataset(alpaca_ds)

# data = data.map(lambda samples: tokenizer(samples['review']), batched=True)
# train_sample = data["train"].select(range(50))

train_sample = alpaca_ds.map(generate_and_tokenize_prompt)
# train_sample = alpaca_ds.map(generate_and_tokenize_prompt)
# train_sample = data["train"]


# train_sample = train_sample.remove_columns('sentiment')
train_sample = train_sample.remove_columns(
    [col for col in train_sample.column_names if col not in ["input_ids", "attention_mask", "labels"]]
)
display(train_sample)

Map:   0%|          | 0/52002 [00:00<?, ? examples/s]

ValueError: Column name ['sentiment'] not in the dataset. Current columns in the dataset: ['instruction', 'input', 'output', 'text', 'input_ids', 'attention_mask', 'labels']

In [None]:
print(train_sample[:1])

{'review': ["One of the other reviewers has mentioned that after watching just 1 Oz episode you'll be hooked. They are right, as this is exactly what happened with me.<br /><br />The first thing that struck me about Oz was its brutality and unflinching scenes of violence, which set in right from the word GO. Trust me, this is not a show for the faint hearted or timid. This show pulls no punches with regards to drugs, sex or violence. Its is hardcore, in the classic use of the word.<br /><br />It is called OZ as that is the nickname given to the Oswald Maximum Security State Penitentary. It focuses mainly on Emerald City, an experimental section of the prison where all the cells have glass fronts and face inwards, so privacy is not high on the agenda. Em City is home to many..Aryans, Muslims, gangstas, Latinos, Christians, Italians, Irish and more....so scuffles, death stares, dodgy dealings and shady agreements are never far away.<br /><br />I would say the main appeal of the show is d

In [None]:
import peft
from peft import LoraConfig, get_peft_model, PeftModel

lora_config = LoraConfig(
    r=4, #As bigger the R bigger the parameters to train.
    lora_alpha=1, # a scaling factor that adjusts the magnitude of the weight matrix. Usually set to 1
    target_modules=["query_key_value"], #You can obtain a list of target modules in the URL above.
    lora_dropout=0.05, #Helps to avoid Overfitting.
    bias="lora_only", # this specifies if the bias parameter should be trained.
    task_type="CAUSAL_LM"
)

In [None]:
peft_model = get_peft_model(foundation_model, lora_config)
print(peft_model.print_trainable_parameters())

trainable params: 393,216 || all params: 559,607,808 || trainable%: 0.07026635339584111
None


In [None]:
import os
working_dir = './'

output_directory = os.path.join(working_dir, "peft_lab_outputs")

In [None]:
import transformers
from transformers import TrainingArguments, Trainer
training_args = TrainingArguments(
    output_dir=output_directory,
    auto_find_batch_size=True, # Find a correct bvatch size that fits the size of Data.
    learning_rate= 3e-2, # Higher learning rate than full fine-tuning.
    num_train_epochs=2,
    use_cpu=False,
    report_to="wandb",
    run_name="lora-bloomz-peft",
    logging_strategy="steps",       # ✅ 每隔几个 step 记录一次
    logging_steps=10,
)

In [None]:
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=train_sample,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmiaodee-ai[0m ([33mmiaode-ai[0m). Use [1m`wandb login --relogin`[0m to force relogin




Step,Training Loss
10,4.1821
20,6.7233


Step,Training Loss
10,4.7903
20,5.8078
30,9.4623
40,9.9553
50,9.5455
60,9.521
70,9.6338
80,8.9854
90,8.9743
100,9.2321


TrainOutput(global_step=376, training_loss=9.622473858772441, metrics={'train_runtime': 174.4131, 'train_samples_per_second': 34.39, 'train_steps_per_second': 2.156, 'total_flos': 9405951639846912.0, 'train_loss': 9.622473858772441, 'epoch': 2.0})

In [None]:
peft_model_path = os.path.join(output_directory, f"lora_model")

trainer.model.save_pretrained(peft_model_path)

In [None]:
loaded_model = PeftModel.from_pretrained(foundation_model, peft_model_path, is_trainable=False)
input_sentences = tokenizer("I love this movie because", return_tensors="pt")
foundational_outputs_sentence = get_outputs(loaded_model, input_sentences, max_new_tokens=100)

print(tokenizer.batch_decode(foundational_outputs_sentence, skip_special_tokens=True))

["I love this movie because the was his he and that in, left to a is man last itself's on as but of by also their had with since work noticeides w war into her him it your you right following for over be made got will has thought.' more most recently its an not never them court or cost way then first special all attention again aroundplace I y kakak instead rather short one house ta tour much she they there-w stless disc McCarthy at un set my star! chart others while quarter intent-m"]
