Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
271 changes: 51 additions & 220 deletions llm/peft/lora/roberta_sequence_classification.ipynb

Large diffs are not rendered by default.

161 changes: 122 additions & 39 deletions llm/peft/lora/roberta_sequence_classification.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,50 @@
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import argparse
import os

import mindtorch
from mindtorch.optim import AdamW
from mindtorch.utils.data import DataLoader
# import mindnlp
import mindspore
from mindspore.common.api import _no_grad

from peft import (
get_peft_config,
from tqdm import tqdm
from mindnlp.core.optim import AdamW
from mindnlp import evaluate
from mindnlp.dataset import load_dataset
from mindnlp.transformers import AutoModelForSequenceClassification, AutoTokenizer
from mindnlp.transformers.optimization import get_linear_schedule_with_warmup
from mindnlp.peft import (
get_peft_model,
get_peft_model_state_dict,
set_peft_model_state_dict,
LoraConfig,
PeftType,
PrefixTuningConfig,
PromptEncoderConfig,
LoraConfig,
)

import evaluate
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed
from tqdm import tqdm
import faulthandler
faulthandler.enable()

# mindspore.set_context(pynative_synchronize=True)
# In[2]:

batch_size = 32
model_name_or_path = "roberta-large"
task = "mrpc"
peft_type = PeftType.LORA
device = "npu" # "cuda"
peft_type = PeftType.PROMPT_TUNING
num_epochs = 20


# In[3]:


peft_config = LoraConfig(task_type="SEQ_CLS", inference_mode=False, r=8, lora_alpha=16, lora_dropout=0.1)
lr = 3e-4


# In[4]:


if any(k in model_name_or_path for k in ("gpt", "opt", "bloom")):
padding_side = "left"
else:
Expand All @@ -41,38 +54,108 @@
if getattr(tokenizer, "pad_token_id") is None:
tokenizer.pad_token_id = tokenizer.eos_token_id


# In[5]:


datasets = load_dataset("glue", task)
metric = evaluate.load("glue", task)
print(next(datasets['train'].create_dict_iterator()))


def tokenize_function(examples):
# max_length=None => use the model max length (it's actually the default)
outputs = tokenizer(examples["sentence1"], examples["sentence2"], truncation=True, max_length=None)
return outputs
# In[6]:


tokenized_datasets = datasets.map(
tokenize_function,
batched=True,
remove_columns=["idx", "sentence1", "sentence2"],
)
from mindnlp.dataset import BaseMapFunction

# We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the
# transformers library
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
class MapFunc(BaseMapFunction):
def __call__(self, sentence1, sentence2, label, idx):
outputs = tokenizer(str(sentence1), str(sentence2), truncation=True, max_length=None)
return outputs['input_ids'], outputs['attention_mask'], label


def collate_fn(examples):
return tokenizer.pad(examples, padding="longest", return_tensors="pt")
def get_dataset(dataset, tokenizer):
input_colums=['sentence1', 'sentence2', 'label', 'idx']
output_columns=['input_ids', 'attention_mask', 'labels']
dataset = dataset.map(MapFunc(input_colums, output_columns),
input_colums, output_columns)
dataset = dataset.padded_batch(batch_size, pad_info={'input_ids': (None, tokenizer.pad_token_id),
'attention_mask': (None, 0)})
return dataset

train_dataset = get_dataset(datasets['train'], tokenizer)
eval_dataset = get_dataset(datasets['validation'], tokenizer)

# Instantiate dataloaders.
train_dataloader = DataLoader(tokenized_datasets["train"], shuffle=True, collate_fn=collate_fn, batch_size=batch_size)
eval_dataloader = DataLoader(
tokenized_datasets["validation"], shuffle=False, collate_fn=collate_fn, batch_size=batch_size
)

model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, return_dict=True)
# In[7]:


print(next(train_dataset.create_dict_iterator()))


# In[8]:


metric = evaluate.load("glue", task)


# In[9]:

model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, return_dict=True, attn_implementation='eager')
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
model


# In[10]:


optimizer = AdamW(params=tuple(param for param in model.parameters() if param.requires_grad), lr=lr)

# Instantiate scheduler
lr_scheduler = get_linear_schedule_with_warmup(
optimizer=optimizer,
num_warmup_steps=0.06 * (len(train_dataset) * num_epochs),
num_training_steps=(len(train_dataset) * num_epochs),
)


# In[ ]:


from mindnlp.core import value_and_grad
def forward_fn(**batch):
outputs = model(**batch)
loss = outputs.loss
return loss

grad_fn = value_and_grad(forward_fn, tuple(param for param in model.parameters() if param.requires_grad))

for epoch in range(num_epochs):
model.set_train()
train_total_size = train_dataset.get_dataset_size()
for step, batch in enumerate(tqdm(train_dataset.create_dict_iterator(), total=train_total_size)):
optimizer.zero_grad()
loss = grad_fn(**batch)
optimizer.step()
lr_scheduler.step()

model.set_train(False)
eval_total_size = eval_dataset.get_dataset_size()
for step, batch in enumerate(tqdm(eval_dataset.create_dict_iterator(), total=eval_total_size)):
with _no_grad():
outputs = model(**batch)
predictions = outputs.logits.argmax(axis=-1)
predictions, references = predictions, batch["labels"]
metric.add_batch(
predictions=predictions,
references=references,
)

eval_metric = metric.compute()
print(f"epoch {epoch}:", eval_metric)


# In[ ]:




4 changes: 3 additions & 1 deletion mindnlp/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
from .amp import autocast, GradScaler

from . import profiler, cuda, optim, amp, compiler, jit, version, __future__, overrides, \
return_types, linalg, fx, backends, testing
return_types, linalg, fx, backends, testing, nn

from ._lowrank import svd_lowrank
from .random import get_rng_state, initial_seed, manual_seed, seed, set_rng_state
Expand Down Expand Up @@ -95,3 +95,5 @@ def set_autocast_dtype(device_type, dtype):

def get_autocast_dtype(device_type):
return AUTO_CAST_DTYE[device_type]

__version__ = 'test_version_no_value'
1 change: 1 addition & 0 deletions mindnlp/core/autograd/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def value_and_grad_f(*args, **kwargs):
if kwargs:
run_args = args + tuple(kwargs.values())

grads = _pynative_executor.check_run(grad_, fn_, params_or_argnums, None, *run_args)
grads = _pynative_executor.grad(fn_, grad_, params_or_argnums, None, *run_args)
grads = tuple(mindspore.Tensor(grad) for grad in grads)
if attach_grads:
Expand Down
17 changes: 10 additions & 7 deletions mindnlp/utils/safetensors_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,15 @@ def ndim(self):

def get(self, *args, **kwargs):
nbytes = int(np.prod(self.shape)) * np.dtype(self.dtype).itemsize
offset = self.start_offset
tensor = np.frombuffer(self.buffermmap, dtype=self.dtype, offset=offset,
count=nbytes // np.dtype(self.dtype).itemsize)
buffer = bytearray(nbytes)
self.bufferfile.seek(self.start_offset)
self.bufferfile.readinto(buffer)
tensor = np.frombuffer(buffer, dtype=self.dtype).reshape(self.shape)
tensor = tensor.reshape(self.shape)
if not SUPPORT_BF16 and self.info["dtype"] == 'BF16':
tensor = tensor.astype(np.float16)
tensor = Tensor.from_numpy(tensor)

return tensor

@property
Expand Down Expand Up @@ -135,17 +137,18 @@ def getSize(fileobject):


def metadata_validate(metadata):
start = 0
end = 0
for key, info in metadata.items():
s, e = info["data_offsets"]
if s != start or e < s:
if e < s:
raise ValueError(f"SafeTensorError::InvalidOffset({key})")
start = e
if e > end:
end = e
nelements = np.prod(info["shape"])
nbytes = nelements * _DTYPE_SIZE[info["dtype"]]
if (e - s) != nbytes:
raise ValueError("SafeTensorError::TensorInvalidInfo")
return start
return end

def read_metadata(buffer):
buffer_len = getSize(buffer)
Expand Down
Loading
Loading