# Using VeRA for sequence classification

In this example, we fine-tune Roberta on a sequence classification task using VeRA.

In [6]:
#合并到主仓库后,改为git clone https://github.com/mindspore-lab/mindnlp
#!git clone https://github.com/MQN-80/mindnlp
#!cd mindnlp && git checkout jinyi
#!cd mindnlp && bash scripts/build_and_reinstall.sh
#!pip install -r requirements.txt
#!pip uninstall transformers && pip install tokenizers==0.19.1
#!pip uninstall gradio imageio mindformers torchvision modelarts mindnlp  -y
!pip uninstall mindnlp -y
#!pip install mindnlp
!pip install mindnlp-0.4.1-py3-none-any.whl -i https://pypi.tuna.tsinghua.edu.cn/simple
#!cd mindnlp && bash scripts/build_and_reinstall.sh

Found existing installation: mindnlp 0.4.0
Uninstalling mindnlp-0.4.0:
  Successfully uninstalled mindnlp-0.4.0
Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Processing ./mindnlp-0.4.1-py3-none-any.whl
[33mDEPRECATION: moxing-framework 2.1.16.2ae09d45 has a non-standard version number. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of moxing-framework or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0mInstalling collected packages: mindnlp
Successfully installed mindnlp-0.4.1


首先设定mindspore运行环境,在这里我们选取Ascend作为运行环境

In [1]:
import mindspore
from mindspore import Tensor, mint,ops
mindspore.set_context(device_target='Ascend')
#output = mint.arange(12, 2, -1, dtype=mindspore.int32)
#print(output)



引入mindnlp所需要的环境

In [8]:
import mindspore
from mindnlp.core.optim import AdamW
from mindnlp.peft import LoraConfig, TaskType, get_peft_model
from mindnlp.peft import VeraConfig,LoKrConfig
import evaluate
from mindnlp.dataset import load_dataset
from mindnlp.transformers import AutoModelForSequenceClassification, AutoConfig,AutoTokenizer
from tqdm import tqdm
from mindspore import Tensor, mint,ops

初始化vera所需要的参数

In [10]:
batch_size = 128
model_name_or_path = "roberta-base"
task = "mrpc"
num_epochs = 5  # for best results, increase this number
rank = 8        # for best results, increase this number
max_length = 128

In [12]:
peft_config = VeraConfig(
    task_type="SEQ_CLS",
    r=2,
    d_initial=0.1,
    target_modules=["query", "value", "intermediate.dense"],
    save_projection=True,
)
head_lr = 1e-2
vera_lr = 2e-2

## Loading data

In [14]:
if any(k in model_name_or_path for k in ("gpt", "opt", "bloom")):
    padding_side = "left"
else:
    padding_side = "right"

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, padding_side=padding_side)
if getattr(tokenizer, "pad_token_id") is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id



利用mindnlp.dataset加载数据集,包括训练集和验证集两部分

In [15]:
from mindnlp.dataset import BaseMapFunction
class ModifiedMapFunction(BaseMapFunction):
  def __call__(self, sentence1, sentence2,label,idx):
    out = tokenizer(sentence1,sentence2, truncation=True, max_length=max_length,padding="max_length")
    return out['input_ids'], out['attention_mask'] ,label
input_colums=['sentence1', 'sentence2','label','idx']
output_columns=['input_ids', 'attention_mask', 'labels']
map_fn = ModifiedMapFunction(input_colums, output_columns)
datasets = load_dataset("glue", task)
metric = evaluate.load("glue", task)
datasets['train']=datasets['train'].map(map_fn,input_colums, output_columns)
datasets['validation']=datasets['validation'].map(map_fn,input_colums, output_columns)
datasets['train']=datasets['train'].batch(batch_size=batch_size)
datasets['validation']=datasets['validation'].batch(batch_size=batch_size)

加载模型,并将vera注入到model中,并查看可训练的参数量

In [16]:
model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, return_dict=True, max_length=None)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 647,498 || all params: 125,294,668 || trainable%: 0.5167801713637168


定义optimizer和lr_scheduler

In [17]:
from mindnlp.common.optimization import get_linear_schedule_with_warmup
optimizer = AdamW(
    [
        {"params": [p for n, p in model.named_parameters() if "vera_lambda_" in n], "lr": vera_lr},
        {"params": [p for n, p in model.named_parameters() if "classifier" in n], "lr": head_lr},
    ]
)
lr_scheduler = get_linear_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=(0.06* num_epochs*len(datasets['train'])),
    num_training_steps=(num_epochs*len(datasets['train'])),
)
optimizers=[optimizer,lr_scheduler]

进行训练,在这里我们用mindnlp自带的engine进行训练

In [18]:
from mindnlp.engine import Trainer, TrainingArguments
from mindnlp.engine.utils import EvalPrediction
import numpy as np
metric = evaluate.load("accuracy")
training_args = TrainingArguments(
    num_train_epochs=num_epochs,
    output_dir='./model',
    save_strategy="epoch",
    evaluation_strategy="epoch",
    logging_strategy="steps",
)
def compute_metrics(eval_pred: EvalPrediction):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)
trainer = Trainer(
    model=model,
    train_dataset=datasets['train'],
    eval_dataset=datasets['validation'],
    compute_metrics=compute_metrics,
    optimizers=optimizers,
    args=training_args,
)
trainer.train()

 20%|██        | 29/145 [00:27<01:15,  1.55it/s]
  0%|          | 0/4 [00:00<?, ?it/s][A
 50%|█████     | 2/4 [00:00<00:00,  8.77it/s][A
100%|██████████| 4/4 [00:00<00:00, 11.01it/s][A

-

                                                
 20%|██        | 29/145 [00:28<01:15,  1.55it/s]
100%|██████████| 4/4 [00:00<00:00, 11.01it/s][A
                                             [A

{'eval_loss': 0.635776162147522, 'eval_accuracy': 0.696078431372549, 'eval_runtime': 1.0803, 'eval_samples_per_second': 3.703, 'eval_steps_per_second': 0.926, 'epoch': 1.0}


 40%|████      | 58/145 [00:47<00:49,  1.76it/s]
  0%|          | 0/4 [00:00<?, ?it/s][A
 50%|█████     | 2/4 [00:00<00:00,  9.43it/s][A
                                                
 40%|████      | 58/145 [00:48<00:49,  1.76it/s]
100%|██████████| 4/4 [00:00<00:00,  8.56it/s][A
                                             [A

{'eval_loss': 0.442180871963501, 'eval_accuracy': 0.7990196078431373, 'eval_runtime': 0.9763, 'eval_samples_per_second': 4.097, 'eval_steps_per_second': 1.024, 'epoch': 2.0}


 60%|██████    | 87/145 [01:08<00:33,  1.74it/s]
  0%|          | 0/4 [00:00<?, ?it/s][A
 50%|█████     | 2/4 [00:00<00:00,  9.44it/s][A
                                                
 60%|██████    | 87/145 [01:09<00:33,  1.74it/s]
100%|██████████| 4/4 [00:00<00:00, 12.12it/s][A
                                             [A

{'eval_loss': 0.3532525599002838, 'eval_accuracy': 0.8455882352941176, 'eval_runtime': 0.9291, 'eval_samples_per_second': 4.305, 'eval_steps_per_second': 1.076, 'epoch': 3.0}


 80%|████████  | 116/145 [01:28<00:16,  1.74it/s]
  0%|          | 0/4 [00:00<?, ?it/s][A
 50%|█████     | 2/4 [00:00<00:00,  9.78it/s][A
                                                 
 80%|████████  | 116/145 [01:29<00:16,  1.74it/s]
100%|██████████| 4/4 [00:00<00:00, 11.86it/s][A
                                             [A

{'eval_loss': 0.30721497535705566, 'eval_accuracy': 0.875, 'eval_runtime': 0.9103, 'eval_samples_per_second': 4.394, 'eval_steps_per_second': 1.099, 'epoch': 4.0}


100%|██████████| 145/145 [01:48<00:00,  1.75it/s]
  0%|          | 0/4 [00:00<?, ?it/s][A
 50%|█████     | 2/4 [00:00<00:00, 10.58it/s][A
                                                 
100%|██████████| 145/145 [01:49<00:00,  1.75it/s]
100%|██████████| 4/4 [00:00<00:00, 13.63it/s][A
                                             [A

{'eval_loss': 0.31004124879837036, 'eval_accuracy': 0.8676470588235294, 'eval_runtime': 0.9246, 'eval_samples_per_second': 4.326, 'eval_steps_per_second': 1.082, 'epoch': 5.0}


100%|██████████| 145/145 [01:50<00:00,  1.31it/s]

{'train_runtime': 110.7374, 'train_samples_per_second': 167.604, 'train_steps_per_second': 1.309, 'train_loss': 0.48570598733836207, 'epoch': 5.0}





TrainOutput(global_step=145, training_loss=0.48570598733836207, metrics={'train_runtime': 110.7374, 'train_samples_per_second': 167.604, 'train_steps_per_second': 1.309, 'train_loss': 0.48570598733836207, 'epoch': 5.0})