Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 5 additions & 22 deletions examples/transformers/peft/lora/Qwen2.5-7B-Instruct-Lora.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@
"outputs": [],
"source": [
"import mindnlp\n",
"import mindspore\n",
"\n",
"# mindspore.set_context(pynative_synchronize=True)\n",
"from datasets import Dataset\n",
"import pandas as pd\n",
"from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer, GenerationConfig"
Expand Down Expand Up @@ -167,8 +164,7 @@
"source": [
"import torch\n",
"\n",
"model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-7B-Instruct', torch_dtype=torch.float16)\n",
"model = model.npu()"
"model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-7B-Instruct', torch_dtype=torch.float16, device_map='auto')"
]
},
{
Expand Down Expand Up @@ -250,22 +246,6 @@
"model.print_trainable_parameters()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b6aebd2",
"metadata": {},
"outputs": [],
"source": [
"# 待训练的lora参数需转成fp32\n",
"print_flag = True\n",
"for param in filter(lambda p: p.requires_grad, model.parameters()):\n",
" if print_flag:\n",
" print(param.data.dtype)\n",
" print_flag = False\n",
" param.data = param.data.to(torch.float32)"
]
},
{
"cell_type": "markdown",
"id": "ca055683-837f-4865-9c57-9164ba60c00f",
Expand Down Expand Up @@ -362,11 +342,14 @@
"tokenizer = AutoTokenizer.from_pretrained(mode_path, trust_remote_code=True)\n",
"\n",
"# 加载模型\n",
"model = AutoModelForCausalLM.from_pretrained(mode_path, device_map=\"auto\",torch_dtype=torch.bfloat16, trust_remote_code=True).eval()\n",
"model = AutoModelForCausalLM.from_pretrained(mode_path, torch_dtype=torch.float16, trust_remote_code=True).eval()\n",
"\n",
"# 加载lora权重\n",
"model = PeftModel.from_pretrained(model, model_id=lora_path)\n",
"\n",
"# host to device\n",
"model = model.npu()\n",
"\n",
"prompt = \"你是谁?\"\n",
"inputs = tokenizer.apply_chat_template([{\"role\": \"user\", \"content\": \"假设你是皇帝身边的女人--甄嬛。\"},{\"role\": \"user\", \"content\": prompt}],\n",
" add_generation_prompt=True,\n",
Expand Down
120 changes: 11 additions & 109 deletions examples/transformers/peft/lora/Qwen2.5-7B-Instruct-Lora.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
#!/usr/bin/env python
# !/usr/bin/env python
# coding: utf-8

# # 导入环境

# In[ ]:


# 导入环境
import mindnlp
import mindspore

Expand All @@ -14,30 +10,12 @@
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer, GenerationConfig



# 将JSON文件转换为CSV文件
df = pd.read_json('/home/lvyufeng/lvyufeng/mindnlp/examples/transformers/peft/lora/huanhuan.json')
ds = Dataset.from_pandas(df)


# In[ ]:


ds[:3]


# # 处理数据集

# In[ ]:


# 处理数据集
tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen2.5-7B-Instruct', use_fast=False, trust_remote_code=True)
tokenizer


# In[ ]:


def process_func(example):
MAX_LENGTH = 384 # Llama分词器会将一个中文字切分为多个token,因此需要放开一些最大长度,保证数据的完整性
Expand All @@ -57,55 +35,14 @@ def process_func(example):
"labels": labels
}


# In[ ]:


tokenized_id = ds.map(process_func, remove_columns=ds.column_names)

print(len(tokenized_id))

# In[ ]:


tokenizer.decode(tokenized_id[0]['input_ids'])


# In[ ]:


tokenizer.decode(list(filter(lambda x: x != -100, tokenized_id[1]["labels"])))


# # 创建模型

# In[ ]:


import torch

model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-7B-Instruct', torch_dtype=torch.float16, attn_implementation='eager')
# model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-7B-Instruct', torch_dtype=torch.float16)
model = model.npu()


# In[ ]:


model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-7B-Instruct', torch_dtype=torch.float16, device_map=0)
model.enable_input_require_grads() # 开启梯度检查点时,要执行该方法


# In[ ]:


model.dtype


# # lora

# In[ ]:


# lora
from peft import LoraConfig, TaskType, get_peft_model

config = LoraConfig(
Expand All @@ -116,30 +53,11 @@ def process_func(example):
lora_alpha=32, # Lora alaph,具体作用参见 Lora 原理
lora_dropout=0.1# Dropout 比例
)
config


# In[ ]:


model = get_peft_model(model, config)
config


# In[ ]:


model.print_trainable_parameters()


# In[ ]:


# # 配置训练参数

# In[ ]:


# 配置训练参数
args = TrainingArguments(
output_dir="./output/Qwen2.5_instruct_lora",
per_device_train_batch_size=4,
Expand All @@ -149,39 +67,21 @@ def process_func(example):
save_steps=100,
learning_rate=1e-4,
save_on_each_node=True,
# fp16=True,
# gradient_checkpointing=True
)


# In[ ]:


trainer = Trainer(
model=model,
args=args,
train_dataset=tokenized_id,
data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
)


# In[ ]:


trainer.accelerator.state


# In[ ]:


trainer.train()

# 合并加载模型

# # 合并加载模型

# In[ ]:


import mindnlp
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from peft import PeftModel
Expand All @@ -193,11 +93,13 @@ def process_func(example):
tokenizer = AutoTokenizer.from_pretrained(mode_path, trust_remote_code=True)

# 加载模型
model = AutoModelForCausalLM.from_pretrained(mode_path, device_map="auto",torch_dtype=torch.bfloat16, trust_remote_code=True).eval()
model = AutoModelForCausalLM.from_pretrained(mode_path, torch_dtype=torch.float16, trust_remote_code=True).eval()

# 加载lora权重
model = PeftModel.from_pretrained(model, model_id=lora_path)

model = model.npu()

prompt = "你是谁?"
inputs = tokenizer.apply_chat_template([{"role": "user", "content": "假设你是皇帝身边的女人--甄嬛。"},{"role": "user", "content": prompt}],
add_generation_prompt=True,
Expand Down
88 changes: 85 additions & 3 deletions mindnlp/core/_C/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Any
from mindspore import Generator as msGenerator
import mindspore
from mindspore.ops.operations._inner_ops import Generator as GeneratorOp

from mindnlp import core
from . import _nn
Expand Down Expand Up @@ -105,19 +105,101 @@ def __exit__(self, type: Any, value: Any, traceback: Any):

device_ = device

class Generator(msGenerator):
STEP = 0
SEED = 1
GET_STATE = 2
SET_STATE = 3
MANUAL_SEED = 4
INITIAL_SEED = 5

class Generator:
def __init__(self, device='cpu'):
super().__init__()
if device == 'cuda' and DEVICE_TARGET == 'Ascend':
device = 'npu'
self._device = device_(device) if isinstance(device, str) else device

self._seed = mindspore.Tensor(0)
self._offset = mindspore.Tensor(0)
self._generator = GeneratorOp().set_device("CPU")
self._generator.add_prim_attr("manual_seed", False)


@property
def device(self):
if hasattr(self, '_device'):
return self._device
return device('cpu')

def set_state(self, state):
"""
Sets the generator state.

Args:
state (tensor): target state of the generator.
"""
self._generator(SET_STATE, (self._seed, self._offset, state))

def get_state(self):
"""
Get the generator state.

Returns:
Tensor, generator state.
"""
return self._generator(GET_STATE, (self._seed, self._offset))[2]

def seed(self): # pylint: disable=redefined-outer-name
"""
Seed generator with random number.

Returns:
Randomly generated seeds, the type is int.
"""
current_seed = self._generator(
SEED, (self._seed, self._offset))[0]
return current_seed.item()

def manual_seed(self, seed): # pylint: disable=redefined-outer-name
"""
Set the generator seed.

Args:
seed (int): Set the generator seed.

Returns:
Generator, the generator instance.
"""
if not isinstance(seed, int):
raise TypeError("Seed must be an integer.")
seed = mindspore.Tensor(seed, mindspore.int64)
self._generator(MANUAL_SEED, (self._seed, self._offset, seed))
self._generator.add_prim_attr("manual_seed", True)
return self

def initial_seed(self):
"""
Return the initial seed of generator.

Returns:
The initial seed of generator.
"""
current_seed = self._generator(
INITIAL_SEED, (self._seed, self._offset))[0]
return current_seed.item()


def _step(self, step):
"""
Return current seed and offset, and update offset for the next call.

Args:
step (Tensor): Update offset by step.

Returns:
Current seed and offset.
"""
return self._generator(STEP, (self._seed, self._offset, step,))[:2]

default_generator = Generator()

class Tag: pass
Expand Down
6 changes: 3 additions & 3 deletions mindnlp/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@



from ._C import *
from ._C.size import Size
from ._dtype import *
from .ops import *
from ._tensor import Tensor, tensor, is_tensor, \
LongTensor, FloatTensor, BoolTensor, HalfTensor, BFloat16Tensor, IntTensor
from ._C import *
from ._C.size import Size
from .ops import *
from ._tensor import enable_mindspore_patch
enable_mindspore_patch()

Expand Down
Loading
Loading