In [1]:
import numpy as np
import time
import mindspore
import mindspore.nn as nn
import mindspore.ops as ops
import mindspore.dataset as ds

from mindspore import context, Tensor
from mindspore.communication import init, get_rank

from mindnlp.peft import LoraConfig, LoraModel


context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
# init('nccl')

  from tqdm.autonotebook import tqdm


In [2]:
# mindspore 构造一个测试数据集

X = mindspore.ops.rand((1000,20), seed=0)
y = (X.sum(1) > 10).int()

print(ops.DType()(X))
print(y.shape)

n_train = 800
batch_size = 64

class MyIterable:
    def __init__(self, X: Tensor, y:Tensor):
        self._index = 0
        self._data = X
        self._label = y

    def __next__(self):
        if self._index >= len(self._data):
            raise StopIteration
        else:
            item = (self._data[self._index], self._label[self._index])
            self._index += 1
            return item

    def __iter__(self):
        self._index = 0
        return self

    def __len__(self):
        return len(self._data)


train_ds = ds.GeneratorDataset(
    source=MyIterable(X[:n_train], y[:n_train]),
    column_names=["data", "label"],
    shuffle=True
)
eval_ds = ds.GeneratorDataset(
    source=MyIterable(X[n_train:], y[n_train:]),
    column_names=["data", "label"],
)

train_dataloader = train_ds.batch(batch_size=batch_size, drop_remainder=True)
eval_dataloader = eval_ds.batch(batch_size=batch_size, drop_remainder=True)

train_ds, train_dataloader


Float32
(1000,)


(<mindspore.dataset.engine.datasets_user_defined.GeneratorDataset at 0x7f323c15ddc0>,
 <mindspore.dataset.engine.datasets.BatchDataset at 0x7f31bffae310>)

### Model

In [3]:

def print_net_params(net: nn.Cell):
    all_parameter = []
    for item in net.get_parameters():
        all_parameter.append(item)
        print(item.name, item.data.shape)
    print(f"all parameter numbers: {len(all_parameter)}")

    # Obtain trainable parameters.
    trainable_params = net.trainable_params()
    for item in trainable_params:
        print(item.name, item.data.shape)
    print(f"trainable parameter numbers: {len(trainable_params)}")


In [4]:
class MLP(nn.Cell):
    def __init__(self, hidden=2000):
        super(MLP, self).__init__()
        self.layers = nn.SequentialCell(
            nn.Dense(20, hidden),
            nn.ReLU(),
            nn.Dense(hidden, hidden),
            nn.ReLU(),
            nn.Dense(hidden, 2),
            nn.LogSoftmax(axis=-1)
        )

    def construct(self, X):
        return self.layers(X)
    

In [5]:
lr = 0.002
batch_size = 64
max_epochs = 50

mlp = MLP()
print_net_params(mlp)
optimizer = nn.Adam(mlp.trainable_params(), learning_rate=lr)
criterion = nn.CrossEntropyLoss()


layers.0.weight (2000, 20)
layers.0.bias (2000,)
layers.2.weight (2000, 2000)
layers.2.bias (2000,)
layers.4.weight (2, 2000)
layers.4.bias (2,)
all parameter numbers: 6
layers.0.weight (2000, 20)
layers.0.bias (2000,)
layers.2.weight (2000, 2000)
layers.2.bias (2000,)
layers.4.weight (2, 2000)
layers.4.bias (2,)
trainable parameter numbers: 6


In [6]:


def train(model, optimizer, criterion, train_dataloader, eval_dataloader, epochs):
    def forward_fn(data, label):
        logits = model(data)
        loss = criterion(logits, label)
        return loss, logits

    grad_fn = mindspore.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True)

    def train_one_epoch():
        model.set_train()
        total_loss = 0

        for xb, yb in train_dataloader:
            # forward + compute grad
            (loss, logits), grad = grad_fn(xb, yb)
            # update model params
            optimizer(grad)
            total_loss += loss
        
        return total_loss / len(train_dataloader)
    
    def eval_one_epoch():
        model.set_train(False)
        total_loss = 0

        for xb, yb in eval_dataloader:
            (loss, logits), grad = grad_fn(xb, yb)
            total_loss += loss

        return total_loss / len(eval_dataloader)

    # train start from here
    for epoch in range(epochs):
        train_loss = train_one_epoch()
        eval_loss = eval_one_epoch()

        if epoch % 2 == 0:
            print(f"epoch:{epoch}  train_loss:{train_loss}  eval_loss:{eval_loss}")
    


In [7]:
%time train(mlp, optimizer, criterion, train_dataloader, eval_dataloader, epochs=max_epochs)

epoch:0  train_loss:0.69207174  eval_loss:0.6788408
epoch:2  train_loss:0.6668038  eval_loss:0.7038066
epoch:4  train_loss:0.42241004  eval_loss:0.37191167
epoch:6  train_loss:0.2422852  eval_loss:0.24100828
epoch:8  train_loss:0.13968705  eval_loss:0.30576023
epoch:10  train_loss:0.19051766  eval_loss:0.3676723
epoch:12  train_loss:0.15267205  eval_loss:0.20878808
epoch:14  train_loss:0.23691426  eval_loss:0.3014338
epoch:16  train_loss:0.5297999  eval_loss:0.6039374


: 

: 

In [None]:
# check
trainable_params = mlp.trainable_params()
print(mlp)
trainable_params

MLP<
  (layers): SequentialCell<
    (0): Dense<input_channels=20, output_channels=2000, has_bias=True>
    (1): ReLU<>
    (2): Dense<input_channels=2000, output_channels=2000, has_bias=True>
    (3): ReLU<>
    (4): Dense<input_channels=2000, output_channels=2, has_bias=True>
    (5): LogSoftmax<>
    >
  >


[Parameter (name=layers.0.weight, shape=(2000, 20), dtype=Float32, requires_grad=True),
 Parameter (name=layers.0.bias, shape=(2000,), dtype=Float32, requires_grad=True),
 Parameter (name=layers.2.weight, shape=(2000, 2000), dtype=Float32, requires_grad=True),
 Parameter (name=layers.2.bias, shape=(2000,), dtype=Float32, requires_grad=True),
 Parameter (name=layers.4.weight, shape=(2, 2000), dtype=Float32, requires_grad=True),
 Parameter (name=layers.4.bias, shape=(2,), dtype=Float32, requires_grad=True)]

In [None]:
import mindnlp
import mindnlp.peft as peft


# target_modules are modules to add PEFT params
# modules_to_save are original modules, not freezed.
config = peft.LoraConfig(
    r=8,
    target_modules=["layers.0", "layers.2"],
    modules_to_save=["layers.4"],
)

In [None]:
config

LoraConfig(base_model_name_or_path=None, peft_type=<PeftType.LORA: 'LORA'>, task_type=None, inference_mode=False, r=8, target_modules=['layers.0', 'layers.2'], lora_alpha=None, lora_dropout=None, fan_in_fan_out=False, bias='none', modules_to_save=['layers.4'], init_lora_weights=True, layers_to_transform=None, layers_pattern=None)

In [None]:
mlp = MLP()
peft_mlp = peft.get_peft_model(mlp, peft_config=config)

AttributeError: 'LoraConfig' object has no attribute 'is_prompt_learning'