Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions examples/transformers/peft/lora/Qwen2.5-7B-Instruct-Lora.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"metadata": {},
"outputs": [],
"source": [
"!wget https://openi.pcl.ac.cn/mindnlp/self-llm/raw/branch/master/dataset/huanhuan.json -O huanhuan.json --no-check-certificate"
"!wget \"https://gh-proxy.com/https://raw.githubusercontent.com/datawhalechina/self-llm/refs/heads/master/dataset/huanhuan.json\" -O huanhuan.json --no-check-certificate"
]
},
{
Expand Down Expand Up @@ -164,7 +164,7 @@
"source": [
"import torch\n",
"\n",
"model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-7B-Instruct', torch_dtype=torch.float16, device_map='auto')"
"model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-7B-Instruct', torch_dtype=torch.float16, device_map=0)"
]
},
{
Expand Down Expand Up @@ -265,8 +265,8 @@
"source": [
"args = TrainingArguments(\n",
" output_dir=\"./output/Qwen2.5_instruct_lora\",\n",
" per_device_train_batch_size=4,\n",
" gradient_accumulation_steps=4,\n",
" per_device_train_batch_size=3,\n",
" gradient_accumulation_steps=5,\n",
" logging_steps=10,\n",
" num_train_epochs=3,\n",
" save_steps=100, \n",
Expand Down Expand Up @@ -336,7 +336,7 @@
"from peft import PeftModel\n",
"\n",
"mode_path = 'Qwen/Qwen2.5-7B-Instruct'\n",
"lora_path = './output/Qwen2.5_instruct_lora/checkpoint-702' # 这里改称你的 lora 输出对应 checkpoint 地址\n",
"lora_path = './output/Qwen2.5_instruct_lora/checkpoint-747' # 这里改称你的 lora 输出对应 checkpoint 地址\n",
"\n",
"# 加载tokenizer\n",
"tokenizer = AutoTokenizer.from_pretrained(mode_path, trust_remote_code=True)\n",
Expand Down
14 changes: 9 additions & 5 deletions examples/transformers/peft/lora/Qwen2.5-7B-Instruct-Lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@
import mindnlp
import mindspore

# mindspore.set_context(pynative_synchronize=True)
from datasets import Dataset
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer, GenerationConfig

# download file
"""
wget "https://gh-proxy.com/https://raw.githubusercontent.com/datawhalechina/self-llm/refs/heads/master/dataset/huanhuan.json"
"""

# 将JSON文件转换为CSV文件
df = pd.read_json('/home/lvyufeng/lvyufeng/mindnlp/examples/transformers/peft/lora/huanhuan.json')
df = pd.read_json('./huanhuan.json')
ds = Dataset.from_pandas(df)

# 处理数据集
Expand Down Expand Up @@ -60,8 +64,8 @@ def process_func(example):
# 配置训练参数
args = TrainingArguments(
output_dir="./output/Qwen2.5_instruct_lora",
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
per_device_train_batch_size=3,
gradient_accumulation_steps=5,
logging_steps=10,
num_train_epochs=3,
save_steps=100,
Expand All @@ -87,7 +91,7 @@ def process_func(example):
from peft import PeftModel

mode_path = 'Qwen/Qwen2.5-7B-Instruct'
lora_path = './output/Qwen2.5_instruct_lora/checkpoint-702' # 这里改称你的 lora 输出对应 checkpoint 地址
lora_path = './output/Qwen2.5_instruct_lora/checkpoint-747' # 这里改称你的 lora 输出对应 checkpoint 地址

# 加载tokenizer
tokenizer = AutoTokenizer.from_pretrained(mode_path, trust_remote_code=True)
Expand Down
6 changes: 3 additions & 3 deletions mindnlp/core/distributed/device_mesh.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from typing import Dict, List, Optional, Tuple, TYPE_CHECKING, Union

from mindnlp import core
from core.distributed import is_available
from core.utils._typing_utils import not_none
from mindnlp.core.distributed import is_available
from mindnlp.core.utils._typing_utils import not_none


__all__ = ["init_device_mesh", "DeviceMesh"]
Expand Down Expand Up @@ -37,7 +37,7 @@ def _init_device_mesh_stub():

else:
from .c10d import Backend as C10dBackend
from core.distributed.distributed_c10d import (
from mindnlp.core.distributed.distributed_c10d import (
_find_pg_by_ranks_and_tag,
_get_default_group,
_get_group_tag,
Expand Down
4 changes: 2 additions & 2 deletions mindnlp/core/nn/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,9 +1197,9 @@ def scaled_dot_product_attention(query, key, value, attn_mask=None, dropout_p=0.

attn_weight = query.float() @ key.transpose(-2, -1).float() * scale_factor
attn_weight += attn_bias.float()
attn_weight = softmax(attn_weight, dim=-1)
attn_weight = softmax(attn_weight, dim=-1, dtype=core.float32).to(query.dtype)
attn_weight = dropout(attn_weight, dropout_p, training=True)
return (attn_weight @ value.float()).to(query.dtype)
return attn_weight @ value


def _mha_shape_check(query, key, value, key_padding_mask, attn_mask, num_heads):
Expand Down
1 change: 1 addition & 0 deletions mindnlp/core/npu/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from mindnlp import core
from mindnlp.core.executor import execute
from ..configs import SUPPORT_BF16, ON_A1
from . import random

FloatTensor = core.FloatTensor
HalfTensor = core.FloatTensor
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,9 @@ def run(self):
'mindspore>=2.5.0',
'tqdm',
'requests',
'accelerate', # hf dependency
'transformers>=4.55.0', # hf dependency
'peft', # hf dependency
'datasets', # hf dependency
'evaluate', # hf dependency
'tokenizers', # hf dependency
Expand Down
Loading