diff --git a/examples/transformers/peft/lora/Qwen2.5-7B-Instruct-Lora.ipynb b/examples/transformers/peft/lora/Qwen2.5-7B-Instruct-Lora.ipynb index 5ffed5b32..359f7e9d4 100644 --- a/examples/transformers/peft/lora/Qwen2.5-7B-Instruct-Lora.ipynb +++ b/examples/transformers/peft/lora/Qwen2.5-7B-Instruct-Lora.ipynb @@ -30,7 +30,7 @@ "metadata": {}, "outputs": [], "source": [ - "!wget https://openi.pcl.ac.cn/mindnlp/self-llm/raw/branch/master/dataset/huanhuan.json -O huanhuan.json --no-check-certificate" + "!wget \"https://gh-proxy.com/https://raw.githubusercontent.com/datawhalechina/self-llm/refs/heads/master/dataset/huanhuan.json\" -O huanhuan.json --no-check-certificate" ] }, { @@ -164,7 +164,7 @@ "source": [ "import torch\n", "\n", - "model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-7B-Instruct', torch_dtype=torch.float16, device_map='auto')" + "model = AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-7B-Instruct', torch_dtype=torch.float16, device_map=0)" ] }, { @@ -265,8 +265,8 @@ "source": [ "args = TrainingArguments(\n", " output_dir=\"./output/Qwen2.5_instruct_lora\",\n", - " per_device_train_batch_size=4,\n", - " gradient_accumulation_steps=4,\n", + " per_device_train_batch_size=3,\n", + " gradient_accumulation_steps=5,\n", " logging_steps=10,\n", " num_train_epochs=3,\n", " save_steps=100, \n", @@ -336,7 +336,7 @@ "from peft import PeftModel\n", "\n", "mode_path = 'Qwen/Qwen2.5-7B-Instruct'\n", - "lora_path = './output/Qwen2.5_instruct_lora/checkpoint-702' # 这里改称你的 lora 输出对应 checkpoint 地址\n", + "lora_path = './output/Qwen2.5_instruct_lora/checkpoint-747' # 这里改称你的 lora 输出对应 checkpoint 地址\n", "\n", "# 加载tokenizer\n", "tokenizer = AutoTokenizer.from_pretrained(mode_path, trust_remote_code=True)\n", diff --git a/examples/transformers/peft/lora/Qwen2.5-7B-Instruct-Lora.py b/examples/transformers/peft/lora/Qwen2.5-7B-Instruct-Lora.py index a3d53f09f..614aaff41 100644 --- a/examples/transformers/peft/lora/Qwen2.5-7B-Instruct-Lora.py +++ b/examples/transformers/peft/lora/Qwen2.5-7B-Instruct-Lora.py @@ -5,13 +5,17 @@ import mindnlp import mindspore -# mindspore.set_context(pynative_synchronize=True) from datasets import Dataset import pandas as pd from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer, GenerationConfig +# download file +""" +wget "https://gh-proxy.com/https://raw.githubusercontent.com/datawhalechina/self-llm/refs/heads/master/dataset/huanhuan.json" +""" + # 将JSON文件转换为CSV文件 -df = pd.read_json('/home/lvyufeng/lvyufeng/mindnlp/examples/transformers/peft/lora/huanhuan.json') +df = pd.read_json('./huanhuan.json') ds = Dataset.from_pandas(df) # 处理数据集 @@ -60,8 +64,8 @@ def process_func(example): # 配置训练参数 args = TrainingArguments( output_dir="./output/Qwen2.5_instruct_lora", - per_device_train_batch_size=4, - gradient_accumulation_steps=4, + per_device_train_batch_size=3, + gradient_accumulation_steps=5, logging_steps=10, num_train_epochs=3, save_steps=100, @@ -87,7 +91,7 @@ def process_func(example): from peft import PeftModel mode_path = 'Qwen/Qwen2.5-7B-Instruct' -lora_path = './output/Qwen2.5_instruct_lora/checkpoint-702' # 这里改称你的 lora 输出对应 checkpoint 地址 +lora_path = './output/Qwen2.5_instruct_lora/checkpoint-747' # 这里改称你的 lora 输出对应 checkpoint 地址 # 加载tokenizer tokenizer = AutoTokenizer.from_pretrained(mode_path, trust_remote_code=True) diff --git a/mindnlp/core/distributed/device_mesh.py b/mindnlp/core/distributed/device_mesh.py index 607c7182a..1361cbc94 100644 --- a/mindnlp/core/distributed/device_mesh.py +++ b/mindnlp/core/distributed/device_mesh.py @@ -8,8 +8,8 @@ from typing import Dict, List, Optional, Tuple, TYPE_CHECKING, Union from mindnlp import core -from core.distributed import is_available -from core.utils._typing_utils import not_none +from mindnlp.core.distributed import is_available +from mindnlp.core.utils._typing_utils import not_none __all__ = ["init_device_mesh", "DeviceMesh"] @@ -37,7 +37,7 @@ def _init_device_mesh_stub(): else: from .c10d import Backend as C10dBackend - from core.distributed.distributed_c10d import ( + from mindnlp.core.distributed.distributed_c10d import ( _find_pg_by_ranks_and_tag, _get_default_group, _get_group_tag, diff --git a/mindnlp/core/nn/functional.py b/mindnlp/core/nn/functional.py index 88309432d..eaa23fad0 100644 --- a/mindnlp/core/nn/functional.py +++ b/mindnlp/core/nn/functional.py @@ -1197,9 +1197,9 @@ def scaled_dot_product_attention(query, key, value, attn_mask=None, dropout_p=0. attn_weight = query.float() @ key.transpose(-2, -1).float() * scale_factor attn_weight += attn_bias.float() - attn_weight = softmax(attn_weight, dim=-1) + attn_weight = softmax(attn_weight, dim=-1, dtype=core.float32).to(query.dtype) attn_weight = dropout(attn_weight, dropout_p, training=True) - return (attn_weight @ value.float()).to(query.dtype) + return attn_weight @ value def _mha_shape_check(query, key, value, key_padding_mask, attn_mask, num_heads): diff --git a/mindnlp/core/npu/__init__.py b/mindnlp/core/npu/__init__.py index 2d7af63ab..04a1715ca 100644 --- a/mindnlp/core/npu/__init__.py +++ b/mindnlp/core/npu/__init__.py @@ -16,6 +16,7 @@ from mindnlp import core from mindnlp.core.executor import execute from ..configs import SUPPORT_BF16, ON_A1 +from . import random FloatTensor = core.FloatTensor HalfTensor = core.FloatTensor diff --git a/setup.py b/setup.py index 6c6b44b62..c4e2a8fe8 100644 --- a/setup.py +++ b/setup.py @@ -159,7 +159,9 @@ def run(self): 'mindspore>=2.5.0', 'tqdm', 'requests', + 'accelerate', # hf dependency 'transformers>=4.55.0', # hf dependency + 'peft', # hf dependency 'datasets', # hf dependency 'evaluate', # hf dependency 'tokenizers', # hf dependency