In [1]:
import os

from mindnlp.transformers import AutoTokenizer, AutoModelForCausalLM
from mindnlp.peft import (
    get_peft_model, LNTuningConfig, TaskType)
from mindnlp.dataset import load_dataset
from mindnlp.engine import Trainer, TrainingArguments

  from .autonotebook import tqdm as notebook_tqdm
Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 0.243 seconds.
Prefix dict has been built successfully.


In [3]:
classes = ["Unlabeled", "complaint", "no complaint"]
model_name_or_path = "bigscience/bloomz-560m"

dataset_name = "twitter_complaints"
max_length = 64
lr = 1e-4
num_epochs = 25
batch_size = 8

In [4]:
peft_config = LNTuningConfig(task_type=TaskType.CAUSAL_LM)

In [5]:
dataset = load_dataset("ought/raft", dataset_name, trust_remote_code=True)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [6]:
dataset['train'] = dataset['train'].project(['Tweet text', 'Label'])
dataset['test'] = dataset['test'].project(['Tweet text', 'Label'])

In [7]:
dataset["train"].get_col_names()

['Tweet text', 'Label']

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
tokenizer.eos_token_id, tokenizer.pad_token_id

(2, 3)

In [9]:
target_max_length = max(tokenizer(classes, return_length=True)["length"])
target_max_length

3

In [10]:
def num_label_2_text_label(num_label):
    return classes[num_label]


def concate_text_label(texts, labels):
    labels = f"{str(labels)}</s>"
    text_formatted = f"Tweet text : {texts} Label : {labels}"
    output = tokenizer(
        text_formatted,
        text_target=labels,
        max_length=max_length,
        padding="max_length",
        truncation=True
    )

    output["labels"] = [-100 if x == tokenizer.pad_token_id else x for x in output['labels']]

    return output["input_ids"], output["attention_mask"], output['labels']


def datapipe(data):
    data = data.map(
        num_label_2_text_label,
        input_columns="Label",
        output_columns="labels",
    )
    data = data.map(
        concate_text_label,
        input_columns=["Tweet text", "labels"],
        output_columns=["input_ids", "attention_mask", "labels"],
    )
    return data

In [11]:
dataset["train"] = datapipe(dataset["train"])
dataset['test'] = datapipe(dataset["test"])

In [12]:
dataset["train"].get_col_names()

['input_ids', 'attention_mask', 'labels']

In [13]:
model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 100,352 || all params: 559,314,944 || trainable%: 0.017941948642087417


In [14]:
training_args = TrainingArguments(
    output_dir="./output",
    num_train_epochs=num_epochs,
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    evaluation_strategy="epoch",
    save_strategy="epoch",
)

In [15]:
trainer = Trainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    args=training_args,
)

In [16]:
trainer.train()

                                               
  4%|▍         | 7/175 [00:17<00:50,  3.34it/s]  

{'eval_loss': 4.95366907119751, 'eval_runtime': 15.0169, 'eval_samples_per_second': 28.302, 'eval_steps_per_second': 3.596, 'epoch': 1.0}


                                                
  8%|▊         | 14/175 [00:36<02:19,  1.15it/s] 

{'eval_loss': 4.8654632568359375, 'eval_runtime': 14.9481, 'eval_samples_per_second': 28.432, 'eval_steps_per_second': 3.613, 'epoch': 2.0}


                                                
 12%|█▏        | 21/175 [00:54<02:20,  1.10it/s] 

{'eval_loss': 4.781277656555176, 'eval_runtime': 14.9663, 'eval_samples_per_second': 28.397, 'eval_steps_per_second': 3.608, 'epoch': 3.0}


                                                
 16%|█▌        | 28/175 [01:13<02:15,  1.09it/s] 

{'eval_loss': 4.702119827270508, 'eval_runtime': 14.9927, 'eval_samples_per_second': 28.347, 'eval_steps_per_second': 3.602, 'epoch': 4.0}


                                                
 20%|██        | 35/175 [01:32<02:08,  1.09it/s] 

{'eval_loss': 4.629334926605225, 'eval_runtime': 14.9047, 'eval_samples_per_second': 28.514, 'eval_steps_per_second': 3.623, 'epoch': 5.0}


                                                
 24%|██▍       | 42/175 [01:51<02:01,  1.09it/s] 

{'eval_loss': 4.564324855804443, 'eval_runtime': 15.0315, 'eval_samples_per_second': 28.274, 'eval_steps_per_second': 3.592, 'epoch': 6.0}


                                                
 28%|██▊       | 49/175 [02:09<01:56,  1.08it/s] 

{'eval_loss': 4.508313179016113, 'eval_runtime': 15.0053, 'eval_samples_per_second': 28.323, 'eval_steps_per_second': 3.599, 'epoch': 7.0}


                                                
 32%|███▏      | 56/175 [02:28<01:50,  1.07it/s] 

{'eval_loss': 4.4620137214660645, 'eval_runtime': 15.0438, 'eval_samples_per_second': 28.251, 'eval_steps_per_second': 3.59, 'epoch': 8.0}


                                                
 36%|███▌      | 63/175 [02:48<01:44,  1.08it/s] 

{'eval_loss': 4.425300121307373, 'eval_runtime': 15.2347, 'eval_samples_per_second': 27.897, 'eval_steps_per_second': 3.545, 'epoch': 9.0}


                                                
 40%|████      | 70/175 [03:07<01:38,  1.07it/s] 

{'eval_loss': 4.39694356918335, 'eval_runtime': 15.0218, 'eval_samples_per_second': 28.292, 'eval_steps_per_second': 3.595, 'epoch': 10.0}


                                                
 44%|████▍     | 77/175 [03:25<01:31,  1.07it/s] 

{'eval_loss': 4.3746113777160645, 'eval_runtime': 14.8727, 'eval_samples_per_second': 28.576, 'eval_steps_per_second': 3.631, 'epoch': 11.0}


                                                
 48%|████▊     | 84/175 [03:44<01:24,  1.08it/s] 

{'eval_loss': 4.355413436889648, 'eval_runtime': 14.8845, 'eval_samples_per_second': 28.553, 'eval_steps_per_second': 3.628, 'epoch': 12.0}


                                                
 52%|█████▏    | 91/175 [04:03<01:17,  1.08it/s] 

{'eval_loss': 4.33675479888916, 'eval_runtime': 15.0295, 'eval_samples_per_second': 28.278, 'eval_steps_per_second': 3.593, 'epoch': 13.0}


                                                
 56%|█████▌    | 98/175 [04:22<01:11,  1.08it/s] 

{'eval_loss': 4.317072868347168, 'eval_runtime': 15.1317, 'eval_samples_per_second': 28.087, 'eval_steps_per_second': 3.569, 'epoch': 14.0}


                                                 
 60%|██████    | 105/175 [04:41<01:05,  1.08it/s]

{'eval_loss': 4.296142578125, 'eval_runtime': 15.1123, 'eval_samples_per_second': 28.123, 'eval_steps_per_second': 3.573, 'epoch': 15.0}


                                                 
 64%|██████▍   | 112/175 [05:00<00:58,  1.08it/s]

{'eval_loss': 4.274670124053955, 'eval_runtime': 14.923, 'eval_samples_per_second': 28.479, 'eval_steps_per_second': 3.619, 'epoch': 16.0}


                                                 
 68%|██████▊   | 119/175 [05:18<00:51,  1.08it/s]

{'eval_loss': 4.2537055015563965, 'eval_runtime': 14.7471, 'eval_samples_per_second': 28.819, 'eval_steps_per_second': 3.662, 'epoch': 17.0}


                                                 
 72%|███████▏  | 126/175 [05:38<00:47,  1.04it/s]

{'eval_loss': 4.234174728393555, 'eval_runtime': 15.0733, 'eval_samples_per_second': 28.195, 'eval_steps_per_second': 3.582, 'epoch': 18.0}


                                                 
 76%|███████▌  | 133/175 [05:56<00:39,  1.07it/s]

{'eval_loss': 4.216706275939941, 'eval_runtime': 14.9157, 'eval_samples_per_second': 28.493, 'eval_steps_per_second': 3.62, 'epoch': 19.0}


                                                 
 80%|████████  | 140/175 [06:15<00:32,  1.07it/s]

{'eval_loss': 4.201669692993164, 'eval_runtime': 14.6981, 'eval_samples_per_second': 28.915, 'eval_steps_per_second': 3.674, 'epoch': 20.0}


                                                 
 84%|████████▍ | 147/175 [06:34<00:25,  1.10it/s]

{'eval_loss': 4.189225673675537, 'eval_runtime': 15.2096, 'eval_samples_per_second': 27.943, 'eval_steps_per_second': 3.55, 'epoch': 21.0}


                                                 
 88%|████████▊ | 154/175 [06:53<00:19,  1.07it/s]

{'eval_loss': 4.179466724395752, 'eval_runtime': 14.782, 'eval_samples_per_second': 28.751, 'eval_steps_per_second': 3.653, 'epoch': 22.0}


                                                 
 92%|█████████▏| 161/175 [07:12<00:12,  1.09it/s]

{'eval_loss': 4.172454357147217, 'eval_runtime': 15.5205, 'eval_samples_per_second': 27.383, 'eval_steps_per_second': 3.479, 'epoch': 23.0}


                                                 
 96%|█████████▌| 168/175 [07:31<00:06,  1.03it/s]

{'eval_loss': 4.168280124664307, 'eval_runtime': 14.7836, 'eval_samples_per_second': 28.748, 'eval_steps_per_second': 3.653, 'epoch': 24.0}


                                                 
100%|██████████| 175/175 [07:50<00:00,  1.08it/s]

{'eval_loss': 4.167085647583008, 'eval_runtime': 15.1478, 'eval_samples_per_second': 28.057, 'eval_steps_per_second': 3.565, 'epoch': 25.0}


100%|██████████| 175/175 [07:52<00:00,  2.70s/it]

{'train_runtime': 472.5518, 'train_samples_per_second': 2.963, 'train_steps_per_second': 0.37, 'train_loss': 6.5912158203125, 'epoch': 25.0}





TrainOutput(global_step=175, training_loss=6.5912158203125, metrics={'train_runtime': 472.5518, 'train_samples_per_second': 2.963, 'train_steps_per_second': 0.37, 'train_loss': 6.5912158203125, 'epoch': 25.0})

In [17]:
# 'twitter_complaints_bigscience_bloomz-560m_LN_TUNING_CAUSAL_LM'
peft_model_id = f"{dataset_name}_{model_name_or_path}_{peft_config.peft_type}_{peft_config.task_type}".replace(
    "/", "_"
)
model.save_pretrained(peft_model_id)

In [18]:
peft_model_id

'twitter_complaints_bigscience_bloomz-560m_LN_TUNING_CAUSAL_LM'

In [19]:
from mindnlp.peft import PeftModel, PeftConfig

# load the LNTuning config
config = PeftConfig.from_pretrained(peft_model_id)
# load the base LM
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
# merge LNTuning weights into the base LM
model = PeftModel.from_pretrained(model, peft_model_id)

In [20]:
text = "Tweet text : I am very angry with the service provided by the company Label :"

In [23]:
output = tokenizer(text, return_tensors='ms')
output

{'input_ids': Tensor(shape=[1, 16], dtype=Int64, value=
[[227985,   5484,    915 ...  16333,  77658,    915]]), 'attention_mask': Tensor(shape=[1, 16], dtype=Int64, value=
[[1, 1, 1 ... 1, 1, 1]])}

In [31]:
out = model.generate(
        input_ids=output["input_ids"], attention_mask=output["attention_mask"], max_new_tokens=3, eos_token_id=3
    )

In [32]:
tokenizer.decode(out[0])

'Tweet text : I am very angry with the service provided by the company Label : it is not'