In [8]:
import numpy as np
import time
import mindspore
import mindspore.nn as nn
import mindspore.ops as ops
import mindspore.dataset as ds

from mindspore import context, Tensor
from mindspore.communication import init, get_rank

context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
# init('nccl')

In [9]:
# mindspore 构造一个测试数据集

X = mindspore.ops.rand((1000,20), seed=0)
y = (X.sum(1) > 10).int()

n_train = 800
batch_size = 64
lr = 1e-4
max_epochs = 100

class MyIterable:
    def __init__(self, X: Tensor, y:Tensor):
        self._index = 0
        self._data = X
        self._label = y

    def __next__(self):
        if self._index >= len(self._data):
            raise StopIteration
        else:
            item = (self._data[self._index], self._label[self._index])
            self._index += 1
            return item

    def __iter__(self):
        self._index = 0
        return self

    def __len__(self):
        return len(self._data)


train_ds = ds.GeneratorDataset(
    source=MyIterable(X[:n_train], y[:n_train]),
    column_names=["data", "label"],
    shuffle=True
)
eval_ds = ds.GeneratorDataset(
    source=MyIterable(X[n_train:], y[n_train:]),
    column_names=["data", "label"],
)

train_dataloader = train_ds.batch(batch_size=batch_size, drop_remainder=True)
eval_dataloader = eval_ds.batch(batch_size=batch_size, drop_remainder=True)

train_ds, train_dataloader


Float32
(1000,)


(<mindspore.dataset.engine.datasets_user_defined.GeneratorDataset at 0x7f3fe4891df0>,
 <mindspore.dataset.engine.datasets.BatchDataset at 0x7f3fe4891eb0>)

### Model

In [10]:

def print_net_params(net: nn.Cell):
    all_parameter = []
    for item in net.get_parameters():
        all_parameter.append(item)
        print(item.name, item.data.shape)
    print(f"all parameter numbers: {len(all_parameter)}")

    # Obtain trainable parameters.
    trainable_params = net.trainable_params()
    for item in trainable_params:
        print(item.name, item.data.shape)
    print(f"trainable parameter numbers: {len(trainable_params)}")


In [11]:
class MLP(nn.Cell):
    def __init__(self, hidden=2000):
        super(MLP, self).__init__()
        self.layers = nn.SequentialCell(
            nn.Dense(20, hidden),
            nn.ReLU(),
            nn.Dense(hidden, hidden),
            nn.ReLU(),
            nn.Dense(hidden, 2),
            nn.LogSoftmax(axis=-1)
        )

    def construct(self, X):
        return self.layers(X)
    

In [12]:


mlp = MLP()
print_net_params(mlp)
optimizer = nn.Adam(mlp.trainable_params(), learning_rate=lr)
criterion = nn.CrossEntropyLoss()


layers.0.weight (2000, 20)
layers.0.bias (2000,)
layers.2.weight (2000, 2000)
layers.2.bias (2000,)
layers.4.weight (2, 2000)
layers.4.bias (2,)
all parameter numbers: 6
layers.0.weight (2000, 20)
layers.0.bias (2000,)
layers.2.weight (2000, 2000)
layers.2.bias (2000,)
layers.4.weight (2, 2000)
layers.4.bias (2,)
trainable parameter numbers: 6


In [13]:


def train(model, optimizer, criterion, train_dataloader, eval_dataloader, epochs):
    def forward_fn(data, label):
        logits = model(data)
        loss = criterion(logits, label)
        return loss, logits

    grad_fn = mindspore.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True)

    def train_one_epoch():
        model.set_train()
        total_loss = 0

        for xb, yb in train_dataloader:
            # forward + compute grad
            (loss, logits), grad = grad_fn(xb, yb)
            # update model params
            optimizer(grad)
            total_loss += loss
        
        return total_loss / len(train_dataloader)
    
    def eval_one_epoch():
        model.set_train(False)
        total_loss = 0

        for xb, yb in eval_dataloader:
            (loss, logits), grad = grad_fn(xb, yb)
            total_loss += loss

        return total_loss / len(eval_dataloader)

    # train start from here
    for epoch in range(epochs):
        train_loss = train_one_epoch()
        eval_loss = eval_one_epoch()

        if epoch % 2 == 0:
            print(f"epoch:{epoch}  train_loss:{train_loss}  eval_loss:{eval_loss}")
    


In [14]:
%time train(mlp, optimizer, criterion, train_dataloader, eval_dataloader, epochs=max_epochs)

epoch:0  train_loss:0.69208336  eval_loss:0.6873576
epoch:2  train_loss:0.6827678  eval_loss:0.67769283
epoch:4  train_loss:0.6582739  eval_loss:0.6506631
epoch:6  train_loss:0.6083377  eval_loss:0.5987927
epoch:8  train_loss:0.53343755  eval_loss:0.52710015
epoch:10  train_loss:0.44243312  eval_loss:0.43978286
epoch:12  train_loss:0.3544993  eval_loss:0.36559346
epoch:14  train_loss:0.2854956  eval_loss:0.31712672
epoch:16  train_loss:0.23512037  eval_loss:0.28558853
epoch:18  train_loss:0.1979201  eval_loss:0.2649181
epoch:20  train_loss:0.16955899  eval_loss:0.25108624
epoch:22  train_loss:0.14691924  eval_loss:0.24156027
epoch:24  train_loss:0.12857452  eval_loss:0.23532297
epoch:26  train_loss:0.113222696  eval_loss:0.2311116
epoch:28  train_loss:0.10017309  eval_loss:0.22861288
epoch:30  train_loss:0.08903113  eval_loss:0.2275504
epoch:32  train_loss:0.07936326  eval_loss:0.22722642
epoch:34  train_loss:0.07092509  eval_loss:0.22780854
epoch:36  train_loss:0.06348407  eval_loss:0

In [None]:
%time train(mlp, optimizer, criterion, train_dataloader, eval_dataloader, epochs=max_epochs)


In [None]:
# check
trainable_params = mlp.trainable_params()
print(mlp)
trainable_params

In [None]:
import mindnlp


import mindnlp.peft as peft

# target_modules are modules to add PEFT params
# modules_to_save are original modules, not freezed.
config = peft.LoraConfig(
    r=8,
    target_modules=["layers.0", "layers.2"],
    modules_to_save=["layers.4"],
)

In [None]:
config

In [None]:
mlp = MLP()
peft_mlp = mindnlp.peft.get_peft_model(mlp, peft_config=config)

In [None]:
peft_mlp.print_trainable_parameters()


In [None]:
import mindspore
from mindspore import nn, ops

class TEMP():
    def __init__(self,):
        self.r = {}

class TestModel(nn.Dense, TEMP):
    def __init__(self, ):
        nn.Dense.__init__(self, 10, 20, has_bias=False)
        TEMP.__init__(self,)

    
    def construct(self, x: mindspore.Tensor):
        print(x.shape)
        return x

x = ops.rand((1000,10), seed=0)
model = TestModel()
model(x)
