mindspore-lab · lvyufeng · Aug 18, 2025 · Aug 18, 2025
diff --git a/examples/transformers/peft/lora/Qwen2.5-7B-Instruct-Lora.ipynb b/examples/transformers/peft/lora/Qwen2.5-7B-Instruct-Lora.ipynb
diff --git a/examples/transformers/peft/lora/Qwen2.5-7B-Instruct-Lora.py b/examples/transformers/peft/lora/Qwen2.5-7B-Instruct-Lora.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# # 导入环境
+
+# In[ ]:
+
+
+import mindnlp
+import mindspore
+
+# mindspore.set_context(pynative_synchronize=True)
+from datasets import Dataset
+import pandas as pd
+from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer, GenerationConfig
+
+
+
+# 将JSON文件转换为CSV文件
+df = pd.read_json('/home/lvyufeng/lvyufeng/mindnlp/examples/transformers/peft/lora/huanhuan.json')
+ds = Dataset.from_pandas(df)
+
+
+# In[ ]:
+
+
+ds[:3]
+
+
+# # 处理数据集
+
+# In[ ]:
+
+
+tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen2.5-7B-Instruct', use_fast=False, trust_remote_code=True)
+tokenizer
+
+
+# In[ ]:
+
+
+def process_func(example):
+    MAX_LENGTH = 384    # Llama分词器会将一个中文字切分为多个token，因此需要放开一些最大长度，保证数据的完整性
+    input_ids, attention_mask, labels = [], [], []
+    instruction = tokenizer(f"<|im_start|>system\n现在你要扮演皇帝身边的女人--甄嬛<|im_end|>\n<|im_start|>user\n{example['instruction'] + example['input']}<|im_end|>\n<|im_start|>assistant\n", add_special_tokens=False)  # add_special_tokens 不在开头加 special_tokens
+    response = tokenizer(f"{example['output']}", add_special_tokens=False)
+    input_ids = instruction["input_ids"] + response["input_ids"] + [tokenizer.pad_token_id]
+    attention_mask = instruction["attention_mask"] + response["attention_mask"] + [1]  # 因为eos token咱们也是要关注的所以 补充为1
+    labels = [-100] * len(instruction["input_ids"]) + response["input_ids"] + [tokenizer.pad_token_id]  
+    if len(input_ids) > MAX_LENGTH:  # 做一个截断
+        input_ids = input_ids[:MAX_LENGTH]
+        attention_mask = attention_mask[:MAX_LENGTH]
+        labels = labels[:MAX_LENGTH]
+    return {
+        "input_ids": input_ids,
+        "attention_mask": attention_mask,
+        "labels": labels
+    }
+
+
+# In[ ]:
+
+
+tokenized_id = ds.map(process_func, remove_columns=ds.column_names)
+
+print(len(tokenized_id))
+
+# In[ ]:
+
+
+tokenizer.decode(tokenized_id[0]['input_ids'])
+
+
+# In[ ]:
+
+
+tokenizer.decode(list(filter(lambda x: x != -100, tokenized_id[1]["labels"])))
+
+
+# # 创建模型
+
+# In[ ]:
+
+
+import torch
+
+model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-7B-Instruct', torch_dtype=torch.float16, attn_implementation='eager')
+# model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-7B-Instruct', torch_dtype=torch.float16)
+model = model.npu()
+
+
+# In[ ]:
+
+
+model.enable_input_require_grads() # 开启梯度检查点时，要执行该方法
+
+
+# In[ ]:
+
+
+model.dtype
+
+
+# # lora 
+
+# In[ ]:
+
+
+from peft import LoraConfig, TaskType, get_peft_model
+
+config = LoraConfig(
+    task_type=TaskType.CAUSAL_LM, 
+    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
+    inference_mode=False, # 训练模式
+    r=8, # Lora 秩
+    lora_alpha=32, # Lora alaph，具体作用参见 Lora 原理
+    lora_dropout=0.1# Dropout 比例
+)
+config
+
+
+# In[ ]:
+
+
+model = get_peft_model(model, config)
+config
+
+
+# In[ ]:
+
+
+model.print_trainable_parameters()
+
+
+# In[ ]:
+
+
+# # 配置训练参数
+
+# In[ ]:
+
+
+args = TrainingArguments(
+    output_dir="./output/Qwen2.5_instruct_lora",
+    per_device_train_batch_size=4,
+    gradient_accumulation_steps=4,
+    logging_steps=10,
+    num_train_epochs=3,
+    save_steps=100, 
+    learning_rate=1e-4,
+    save_on_each_node=True,
+    # fp16=True,
+    # gradient_checkpointing=True
+)
+
+
+# In[ ]:
+
+
+trainer = Trainer(
+    model=model,
+    args=args,
+    train_dataset=tokenized_id,
+    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
+)
+
+
+# In[ ]:
+
+
+trainer.accelerator.state
+
+
+# In[ ]:
+
+
+trainer.train()
+
+
+# # 合并加载模型
+
+# In[ ]:
+
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+from peft import PeftModel
+
+mode_path = 'Qwen/Qwen2.5-7B-Instruct'
+lora_path = './output/Qwen2.5_instruct_lora/checkpoint-702' # 这里改称你的 lora 输出对应 checkpoint 地址
+
+# 加载tokenizer
+tokenizer = AutoTokenizer.from_pretrained(mode_path, trust_remote_code=True)
+
+# 加载模型
+model = AutoModelForCausalLM.from_pretrained(mode_path, device_map="auto",torch_dtype=torch.bfloat16, trust_remote_code=True).eval()
+
+# 加载lora权重
+model = PeftModel.from_pretrained(model, model_id=lora_path)
+
+prompt = "你是谁？"
+inputs = tokenizer.apply_chat_template([{"role": "user", "content": "假设你是皇帝身边的女人--甄嬛。"},{"role": "user", "content": prompt}],
+                                       add_generation_prompt=True,
+                                       tokenize=True,
+                                       return_tensors="pt",
+                                       return_dict=True
+                                       ).to('cuda')
+
+
+gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1}
+with torch.no_grad():
+    outputs = model.generate(**inputs, **gen_kwargs)
+    outputs = outputs[:, inputs['input_ids'].shape[1]:]
+    print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+
diff --git a/mindnlp/__init__.py b/mindnlp/__init__.py
@@ -50,12 +50,10 @@
 
 # set mindnlp.core to torch
 from .utils.torch_proxy import initialize_torch_proxy, setup_metadata_patch
-from .utils.safetensors_patch import setup_safetensors_patch
-from .core._tensor import enable_mindspore_patch
-
-enable_mindspore_patch()
 initialize_torch_proxy()
 setup_metadata_patch()
+
+from .utils.safetensors_patch import setup_safetensors_patch
 setup_safetensors_patch()
 
 from . import transformers

diff --git a/mindnlp/core/_C/_ConvBackend.py b/mindnlp/core/_C/_ConvBackend.py
@@ -0,0 +1,15 @@
+Slow2d = None
+SlowTranspose2d = None
+SlowDilated2d = None
+Slow3d = None
+SlowDilated3d = None
+Empty = None
+CudaDepthwise2d = None
+CudaDepthwise3d = None
+Cudnn = None
+CudnnTranspose = None
+Miopen = None
+MiopenTranspose = None
+MiopenDepthwise = None
+Mkldnn = None
+MkldnnEmpty = None
diff --git a/mindnlp/core/__init__.py b/mindnlp/core/__init__.py
@@ -41,27 +41,16 @@
 inf = float("inf")
 nan = float("nan")
 
-from ._dtype import *
-from ._tensor import Tensor, tensor, is_tensor, \
-    LongTensor, FloatTensor, BoolTensor, HalfTensor, BFloat16Tensor, IntTensor
+
+
 from ._C import *
 from ._C.size import Size
-from .autograd import *
+from ._dtype import *
 from .ops import *
-from .serialization import load, save
-from ._bind import get_default_dtype, set_default_dtype, get_default_device, is_autocast_enabled, set_autocast_enabled, \
-    set_autocast_dtype, get_autocast_dtype
-
-from .amp import autocast, GradScaler
-from .func import vmap
-from .configs import set_pyboost
-
-from . import _dynamo
-from . import profiler, cuda, amp, compiler, jit, version, __future__, overrides, \
-    return_types, linalg, fx, backends, nn, fft, _jit_internal, utils, optim, testing
-from ._lowrank import svd_lowrank
-from .random import get_rng_state, initial_seed, manual_seed, seed, set_rng_state
-
+from ._tensor import Tensor, tensor, is_tensor, \
+    LongTensor, FloatTensor, BoolTensor, HalfTensor, BFloat16Tensor, IntTensor
+from ._tensor import enable_mindspore_patch
+enable_mindspore_patch()
 
 def _has_compatible_shallow_copy_type(tensor, other):
     """
@@ -137,4 +126,26 @@ def typename(obj: _Any, /) -> str:
     return f"{module}.{qualname}"
 
 
+def _nnpack_available():
+    return False
+
+
+from .autograd import *
+from .serialization import load, save
+from ._bind import get_default_dtype, set_default_dtype, get_default_device, is_autocast_enabled, set_autocast_enabled, \
+    set_autocast_dtype, get_autocast_dtype
+
+from .amp import autocast, GradScaler
+from .func import vmap
+from .configs import set_pyboost
+from .storage import UntypedStorage, Storage, TypedStorage
+
+from . import _dynamo
+from . import profiler, cuda, amp, compiler, jit, version, __future__, overrides, \
+    return_types, linalg, fx, backends, nn, fft, _jit_internal, utils, optim, testing
+from ._lowrank import svd_lowrank
+from .random import get_rng_state, initial_seed, manual_seed, seed, set_rng_state
+
+
+
 __version__ = 'test_version_no_value'
diff --git a/mindnlp/core/_dtype.py b/mindnlp/core/_dtype.py
@@ -49,8 +49,8 @@ def __gt__(self, other):
 float8_e4m3fnuz = None
 float8_e5m2fnuz = None
 complex32 = None
-cfloat = complex32
-cdouble = complex64
+cfloat = complex64
+cdouble = complex128
 
 uint1 = None
 uint2 = None

diff --git a/tests/core/__init__.py → mindnlp/core/_mkldnn.py b/tests/core/__init__.py → mindnlp/core/_mkldnn.py
diff --git a/mindnlp/core/_prims/ascend.py b/mindnlp/core/_prims/ascend.py
@@ -238,3 +238,4 @@ def one_hot_ext(tensor, num_classes):
     return pyboost_inner_prim.one_hot_ext_impl(tensor, num_classes, on_value, off_value, -1)
 
 __all__.append('one_hot_ext')
+
diff --git a/mindnlp/core/_prims/cpu.py b/mindnlp/core/_prims/cpu.py
@@ -194,3 +194,18 @@ def bitwise_right_shift(input, other):
     return bitwise_right_shift_op(input, other)
 
 __all__.append('bitwise_right_shift')
+
+embedding_op = ops.Gather().set_device('CPU')
+def embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq):
+    return embedding_op(weight, input, 0)
+
+__all__.append('embedding')
+
+
+def randn(size, seed, offset, dtype):
+    rand_op = ops.StandardNormal()
+    output = rand_op(size)
+    return output
+
+__all__.append('randn')
+
diff --git a/mindnlp/core/_tensor.py b/mindnlp/core/_tensor.py
@@ -16,6 +16,7 @@ class StubTensor: pass
 except:
     from mindspore._c_expression import Tensor as Tensor_
 
+from mindnlp import core
 from . import ops, _dtype
 from ._bind import get_device_in_context, device_, get_default_dtype
 from ._utils import _rebuild_tensor_v2
@@ -2509,6 +2510,9 @@ def backward(self):
     def log_softmax(self, dim):
         return ops.log_softmax(self, dim)
 
+    def char(self):
+        return self.to(core.int8)
+
     @property
     def is_nested(self):
         return False

diff --git a/mindnlp/core/amp/autocast_mode.py b/mindnlp/core/amp/autocast_mode.py
@@ -6,7 +6,7 @@
 
 from mindnlp import core
 
-from mindspore._c_expression.amp import pop_amp_strategy, push_amp_strategy, AmpLevel
+from mindspore._c_expression.amp import pop_amp_strategy, push_amp_strategy, AmpLevel, create_amp_strategy
 from mindspore.common.dtype import TensorType as _dtype, float32
 from mindspore.train.amp import AMP_AUTO_BLACK_LIST, AMP_AUTO_WHITE_LIST, AMP_PRIM_ARG_TABLE
 
@@ -74,6 +74,7 @@ def __enter__(self):
         core.set_autocast_dtype(self.device_type, self.dtype)
         white_list = [(prim.__name__, AMP_PRIM_ARG_TABLE[prim]) for prim in AMP_AUTO_WHITE_LIST]
         black_list = [(prim.__name__, AMP_PRIM_ARG_TABLE[prim]) for prim in AMP_AUTO_BLACK_LIST]
+        amp_strategy = create_amp_strategy(self.amp_level, self.dtype, white_list, black_list)
         push_amp_strategy(self.amp_level, self.dtype, white_list, black_list)
 
     def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any):  # type: ignore[override]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -238,3 +238,4 @@ def one_hot_ext(tensor, num_classes):
		return pyboost_inner_prim.one_hot_ext_impl(tensor, num_classes, on_value, off_value, -1)

		__all__.append('one_hot_ext')