In [18]:
from datasets import load_dataset
from transformers import AutoTokenizer,AutoModelForSequenceClassification,TrainingArguments,Trainer



In [19]:
!pip install peft

Looking in indexes: https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple


In [20]:
dataset = load_dataset("ag_news")

# 取每个 split 的前 30 条，并打乱
train_ds = dataset["train"].shuffle(seed=42).select(range(30))
test_ds = dataset["test"].shuffle(seed=42).select(range(30))

print(train_ds)
print(test_ds)


Dataset({
    features: ['text', 'label'],
    num_rows: 30
})
Dataset({
    features: ['text', 'label'],
    num_rows: 30
})


In [21]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_name = "bert-base-uncased"  # 初学者推荐小模型
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=4)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [22]:
training_args = TrainingArguments(
    output_dir="./p_tuning_demo",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    learning_rate=5e-4,
    logging_steps=10,
    do_eval=True
)



In [23]:
def tokenize_function(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)

tokenized_train = train_ds.map(tokenize_function, batched=True)
tokenized_test = test_ds.map(tokenize_function, batched=True)


Map: 100%|██████████| 30/30 [00:01<00:00, 26.39 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 87.20 examples/s] 


In [24]:


from peft import IA3Config, get_peft_model

# 定义 IA3 配置
peft_config = IA3Config(
    task_type="SEQ_CLS",          # 序列分类
    inference_mode=False,         # 训练模式
    target_modules=["query", "value"]  # 可微调的模块
)

# 将模型改造成可微调的 IA3 模型
model = get_peft_model(model, peft_config)



In [25]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./ia3_news_model",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_dir="./logs",
    logging_steps=50,
    learning_rate=5e-4,
    do_train=True,
    do_eval=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
)


  trainer = Trainer(


In [26]:
trainer.train()




Step,Training Loss


TrainOutput(global_step=6, training_loss=1.3973124821980794, metrics={'train_runtime': 117.1814, 'train_samples_per_second': 0.768, 'train_steps_per_second': 0.051, 'total_flos': 5921591685120.0, 'train_loss': 1.3973124821980794, 'epoch': 3.0})

In [27]:
results = trainer.evaluate()
print(results)




{'eval_loss': 1.3803499937057495, 'eval_runtime': 10.1401, 'eval_samples_per_second': 2.959, 'eval_steps_per_second': 0.197, 'epoch': 3.0}
