In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import os

def save_model():
    base_model_name = "Qwen/Qwen3-4B-Instruct-2507"
    adapter_dir = "../model/archive/"
    offload_dir = "../offload/"
    
    # Create offload directory if it doesn't exist
    os.makedirs(offload_dir, exist_ok=True)
    
    print(f"Loading base model: {base_model_name}")
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        torch_dtype=torch.float16,
        device_map="auto",
        offload_folder=offload_dir,  # Only this parameter is needed
        low_cpu_mem_usage=True
    )
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    
    print(f"Loading LoRA adapter from: {adapter_dir}")
    model = PeftModel.from_pretrained(base_model, adapter_dir)
    
    # Merge and unload adapter
    print("Merging LoRA weights into base model...")
    model = model.merge_and_unload()
    
    # Save merged model
    output_dir = "/kaggle/working/merged_model"
    os.makedirs(output_dir, exist_ok=True)
    
    print("Saving merged model...")
    model.save_pretrained(output_dir, safe_serialization=True)
    tokenizer.save_pretrained(output_dir)
    print(f"Merged model saved at: {output_dir}")

save_model()

Loading base model: Qwen/Qwen3-4B-Instruct-2507


Loading checkpoint shards: 100%|██████████| 3/3 [00:10<00:00,  3.45s/it]
Some parameters are on the meta device because they were offloaded to the disk.


Loading LoRA adapter from: ../model/


ValueError: Can't find 'adapter_config.json' at '../model/'