In [1]:
import torch
from transformers import BitsAndBytesConfig
from transformers import AutoProcessor, Gemma3ForConditionalGeneration
from peft import PeftModel

# 4-bit 양자화 설정
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# 1. 기본 모델과 LoRA 어댑터 불러오기
adapter_path = "./outputs/Yeongi/gemma-3-4b-it-bnb-4bit-lora/checkpoint-500"  # LoRA 어댑터가 저장된 경로
base_model_name = "Yeongi/gemma-3-4b-it-bnb-4bit-lora"
merged_model_name = "Yeongi/gemma-3-4b-it-4bit-SFT-GRPO-500"  # Hub에 업로드할 모델 이름

base_model = Gemma3ForConditionalGeneration.from_pretrained(
    base_model_name,
    torch_dtype=torch.bfloat16,
    # quantization_config=bnb_config,
)

processor = AutoProcessor.from_pretrained(base_model_name, use_fast=True)

model = PeftModel.from_pretrained(
    base_model,
    adapter_path,
)

In [2]:
# from unsloth import FastModel

# # 1. 기본 모델과 LoRA 어댑터 불러오기
# base_model_name = "google/gemma-3-12b-it"
# adapter_path = "./outputs/checkpoint-228"  # LoRA 어댑터가 저장된 경로
# merged_model_name = "Yeongi/gemma-3-12b-it-lora-sft-instruct-bespoke-stratos-3.6ak"  # Hub에 업로드할 모델 이름

# model, tokenizer = FastModel.from_pretrained(adapter_path)

In [3]:
# 2. LoRA 어댑터를 기본 모델과 병합
model = model.merge_and_unload()



In [4]:
# # 3. 병합된 모델 저장 (선택사항)
# save_path = "./merged_model"
# model.save_pretrained(save_path)
# tokenizer.save_pretrained(save_path)

In [5]:
# 4. Hugging Face Hub에 업로드
# 먼저 huggingface-cli login으로 로그인하거나 아래에 token을 입력합니다
from huggingface_hub import HfApi

api = HfApi()

# 또는 환경변수 HUGGINGFACE_TOKEN을 설정할 수 있습니다
# import os
# api.token = os.getenv("HUGGINGFACE_TOKEN")

# 모델 업로드
model.push_to_hub(merged_model_name)
# tokenizer.push_to_hub(merged_model_name)
processor.push_to_hub(merged_model_name)

model.safetensors:   0%|          | 0.00/3.23G [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Yeongi/gemma-3-4b-it-4bit-SFT-GRPO-500/commit/e7053bc57acf82b61f778ad0790fd61204c44638', commit_message='Upload processor', commit_description='', oid='e7053bc57acf82b61f778ad0790fd61204c44638', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Yeongi/gemma-3-4b-it-4bit-SFT-GRPO-500', endpoint='https://huggingface.co', repo_type='model', repo_id='Yeongi/gemma-3-4b-it-4bit-SFT-GRPO-500'), pr_revision=None, pr_num=None)