In [9]:
import argparse
import os

import mindspore
from mindspore.nn import AdamWeightDecay
from mindnlp import load_dataset, process
from tqdm import tqdm
# from mindnlp.metrics import 

from mindnlp.peft import (
    get_peft_config,
    get_peft_model,
    LoraConfig,
    PeftType,
)
from mindnlp.dataset import MRPC, MRPC_Process

from mindnlp.transforms import RobertaTokenizer
from mindnlp.models import RobertaConfig, RobertaForSequenceClassification


# Hyper Parameter

In [10]:
batch_size = 32
model_name_or_path = "roberta-large"
task = "mrpc"
peft_type = PeftType.LORA
device = "GPU" # "cuda"
num_epochs = 20
lr = 3e-4

## Prepare dataset

In [11]:
# load dataset
mrpc_train, mrpc_test = MRPC()

In [12]:
# take a brief look at the dataset
print("ds col names:", mrpc_train.column_names)
print("length of train ds:", len(mrpc_train), "length of test ds:", len(mrpc_test))

iter = mrpc_train.create_tuple_iterator()
for i, (l, s1, s2) in enumerate(iter):
    if i <= 5:
        print(l, s1, s2)

ds col names: ['label', 'sentence1', 'sentence2']
length of train ds: 4076 length of test ds: 1725
1 Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence . Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .
0 Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion . Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .
1 They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added . On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .
0 Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 . Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .
1 The stock rose $ 2.11 , or about 11 percent , to close F

In [13]:

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
cols = ['sentence1', 'sentence2']
def process_dataset(dataset, tokenizer, column_names, batch_size, max_seq_len=512, shuffle=False):
    # tokenize
    for col in column_names:
        dataset = dataset.map(tokenizer, input_columns=col)

    # 
    return dataset

ds = process_dataset(mrpc_train, tokenizer, column_names=cols, batch_size=batch_size)



## roberta-base model from pretrained

In [16]:

model_config = RobertaConfig(num_labels=2)
model = RobertaForSequenceClassification.from_pretrained('roberta-base', config=model_config )

# model

['roberta.encoder.layer.12.attention.self_attn.query.weight', 'roberta.encoder.layer.12.attention.self_attn.query.bias', 'roberta.encoder.layer.12.attention.self_attn.key.weight', 'roberta.encoder.layer.12.attention.self_attn.key.bias', 'roberta.encoder.layer.12.attention.self_attn.value.weight', 'roberta.encoder.layer.12.attention.self_attn.value.bias', 'roberta.encoder.layer.12.attention.output.dense.weight', 'roberta.encoder.layer.12.attention.output.dense.bias', 'roberta.encoder.layer.12.attention.output.layer_norm.gamma', 'roberta.encoder.layer.12.attention.output.layer_norm.beta', 'roberta.encoder.layer.12.intermediate.dense.weight', 'roberta.encoder.layer.12.intermediate.dense.bias', 'roberta.encoder.layer.12.output.dense.weight', 'roberta.encoder.layer.12.output.dense.bias', 'roberta.encoder.layer.12.output.layer_norm.gamma', 'roberta.encoder.layer.12.output.layer_norm.beta', 'roberta.encoder.layer.13.attention.self_attn.query.weight', 'roberta.encoder.layer.13.attention.self_a

In [17]:
peft_config = LoraConfig(task_type="SEQ_CLS", inference_mode=False, r=8, lora_alpha=16, lora_dropout=0.1)
peft_model = get_peft_model(model, peft_config)
# model.print_train_parameters()

KeyError: 'model_type'