In [1]:
from peft import PeftModel

base_model_name = "./model/Qwen3-0.6B"
lora_path = "./lora_output/checkpoint-702"

# 1. 加载基础模型
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForCausalLM.from_pretrained(base_model_name).to(device)

# 2. 加载 LoRA
lora_model = PeftModel.from_pretrained(base_model, lora_path).to(device)

# 3. 合并 LoRA 到 base_model
lora_model.merge_and_unload()  # LoRA 权重直接写入 base_model


  from .autonotebook import tqdm as notebook_tqdm
  import pynvml  # type: ignore[import]


Qwen3ForCausalLM(
  (model): Qwen3Model(
    (embed_tokens): Embedding(151936, 1024)
    (layers): ModuleList(
      (0-27): 28 x Qwen3DecoderLayer(
        (self_attn): Qwen3Attention(
          (q_proj): Linear(in_features=1024, out_features=2048, bias=False)
          (k_proj): Linear(in_features=1024, out_features=1024, bias=False)
          (v_proj): Linear(in_features=1024, out_features=1024, bias=False)
          (o_proj): Linear(in_features=2048, out_features=1024, bias=False)
          (q_norm): Qwen3RMSNorm((128,), eps=1e-06)
          (k_norm): Qwen3RMSNorm((128,), eps=1e-06)
        )
        (mlp): Qwen3MLP(
          (gate_proj): Linear(in_features=1024, out_features=3072, bias=False)
          (up_proj): Linear(in_features=1024, out_features=3072, bias=False)
          (down_proj): Linear(in_features=3072, out_features=1024, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen3RMSNorm((1024,), eps=1e-06)
        (post_attention_layernorm): Qwe

In [10]:
prompt = "指令: 你爸爸是谁？\n回答:"
inputs = tokenizer(prompt, return_tensors="pt").to(device)

with torch.no_grad():
    output = base_model.generate(
        **inputs,
        max_new_tokens=100,
        do_sample=True,
        temperature=0.8,
        top_p=0.9
    )

text = tokenizer.decode(output[0], skip_special_tokens=True)
print(text)

指令: 你爸爸是谁？
回答: 我是甄嬛，我父亲是大理寺少卿甄远道。


In [11]:
merged_model_dir = "./qwen0.6B_lora_merged"
base_model.save_pretrained(merged_model_dir)
tokenizer.save_pretrained(merged_model_dir)

('./qwen0.6B_lora_merged/tokenizer_config.json',
 './qwen0.6B_lora_merged/special_tokens_map.json',
 './qwen0.6B_lora_merged/chat_template.jinja',
 './qwen0.6B_lora_merged/vocab.json',
 './qwen0.6B_lora_merged/merges.txt',
 './qwen0.6B_lora_merged/added_tokens.json',
 './qwen0.6B_lora_merged/tokenizer.json')

In [None]:
# ! ms-b4990507-632e-49f8-9ec8-70461fa8b180

In [12]:
from modelscope.hub.api import HubApi

YOUR_ACCESS_TOKEN = 'ms-b4990507-632e-49f8-9ec8-70461fa8b180'
api = HubApi()
api.login(YOUR_ACCESS_TOKEN)

('TJaEYBjeJz_vPFugsikN',
 <RequestsCookieJar[Cookie(version=0, name='acw_tc', value='0b62600617588128674432402e211bc1a4a1d9f41fa3116edba11eb2e5e685', port=None, port_specified=False, domain='www.modelscope.cn', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=1758814667, discard=False, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False), Cookie(version=0, name='csrf_session', value='MTc1ODgxMjg2N3xEWDhFQVFMX2dBQUJFQUVRQUFBeV80QUFBUVp6ZEhKcGJtY01DZ0FJWTNOeVpsTmhiSFFHYzNSeWFXNW5EQklBRURFMlNWVmlNbEEwY21GbWJVMXhNbEE9fI4w6Asl3i_f53V_U7yRXcvVkFPRf5K4zZf7ZxK0vOBv', port=None, port_specified=False, domain='www.modelscope.cn', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=1761404867, discard=False, comment=None, comment_url=None, rest={}, rfc2109=False), Cookie(version=0, name='csrf_token', value='EISUcQMsnVbcY869rB1iKMpj4Cw%3D', port=None, port_specified=False, dom

In [14]:
from modelscope.hub.constants import Licenses, ModelVisibility

owner_name = 'nev8rz'
model_name = 'huanhuanchat_base_qwen3_0_6B'
model_id = f"{owner_name}/{model_name}"

api.create_model(
    model_id,
    visibility=ModelVisibility.PUBLIC,
    license=Licenses.APACHE_V2,
    chinese_name="嬛嬛"
)

'https://www.modelscope.cn/models/nev8rz/huanhuanchat_base_qwen3_0_6B'

In [15]:
api.upload_folder(
    repo_id=f"{owner_name}/{model_name}",
    folder_path='./qwen0.6B_lora_merged/',
    commit_message='嬛嬛 来啦',
)

2025-09-25 23:11:36,778 - modelscope - INFO - Preparing files to upload ...
2025-09-25 23:11:36,782 - modelscope - INFO - Prepared 10 files for upload.
2025-09-25 23:11:36,782 - modelscope - INFO - Checking 10 files to upload ...
[Validating Hash for model.safetensors]: 100%|██████████| 2.38G/2.38G [00:32<00:00, 72.5MB/s]
[Uploading model.safetensors]: 100%|██████████| 2.38G/2.38G [00:30<00:00, 79.5MB/s]
Processing 10 items: 100%|██████████| 10.0/10.0 [01:03<00:00, 6.32s/it]

Committing 10 files in 1 batch(es) of size 512.



[Committing batches] :   0%|          | 0/1 [00:00<?, ?it/s]2025-09-25 23:12:41,445 - modelscope - INFO - Commit succeeded: https://www.modelscope.cn/api/v1/repos/models/nev8rz/huanhuanchat_base_qwen3_0_6B/commit/master
[Committing batches] : 100%|██████████| 1/1 [00:01<00:00,  1.28s/it]


CommitInfo(commit_url='https://www.modelscope.cn/api/v1/repos/models/nev8rz/huanhuanchat_base_qwen3_0_6B/commit/master', commit_message='嬛嬛 来啦 (batch 1/1)', commit_description='Uploading files', oid='')