# Import

In [None]:
import os
import torch
import shutil
from pathlib import Path

from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer

from llmcompressor import oneshot
from llmcompressor.modifiers.quantization import GPTQModifier

# Setting

In [None]:
MODEL_ID = "./base_model"     
OUT_DIR  = "./model"          

DATASET_ID = "LGAI-EXAONE/MANTA-1M"
DATASET_SPLIT = "train"

NUM_CALIBRATION_SAMPLES = 1024
MAX_SEQUENCE_LENGTH = 256

# Quantization
SCHEME = "W4A16"
TARGETS = ["Linear"]
IGNORE  = ["embed_tokens", "lm_head"]

# Model Loads

In [None]:
print("[INFO] 모델 로드 중...")

tokenizer = AutoTokenizer.from_pretrained(
    MODEL_ID,
    trust_remote_code=True,
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16,
)

print("[INFO] 모델/토크나이저 로드 완료")

# Dataset Loads & Preprocess

In [None]:
print("[INFO] 캘리브레이션 데이터 로드 중...")

ds = load_dataset(
    DATASET_ID,
    split=f"{DATASET_SPLIT}[:{NUM_CALIBRATION_SAMPLES}]",
)

def preprocess(example):
    return {
        "text": tokenizer.apply_chat_template(
            example["conversations"],
            add_generation_prompt=True,
            tokenize=False)
    }

ds = ds.map(preprocess)

print("[INFO] 데이터 전처리 완료")

# GPTQ Quantization

In [None]:
print(f"[INFO] GPTQ 시작 (scheme={SCHEME}, samples={NUM_CALIBRATION_SAMPLES}, max_len={MAX_SEQUENCE_LENGTH})...")

early_range = range(0, 10)
mid_range = range(10, 20)
late_range = range(20, 30)

sub_modules = ["self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj", "self_attn.o_proj", 
               "mlp.gate_proj", "mlp.up_proj", "mlp.down_proj"]

# 최종 타겟 리스트 생성
EARLY_TARGETS = [f"model.layers.{i}.{sub}" for i in early_range for sub in sub_modules]
MID_TARGETS = [f"model.layers.{i}.{sub}" for i in mid_range for sub in sub_modules]
LATE_TARGETS = [f"model.layers.{i}.{sub}" for i in late_range for sub in sub_modules]

recipe = [
    # 구간 1: 앞부분 (보통 0.01~0.1)
    GPTQModifier(
        scheme=SCHEME,
        targets=EARLY_TARGETS,
        ignore=IGNORE, # 사진 속의 그 IGNORE 리스트
        dampening_frac=0.1
    ),
    # 구간 2: 중간 부분 (현재 쓰시는 0.3 수준)
    GPTQModifier(
        scheme=SCHEME,
        targets=MID_TARGETS,
        ignore=IGNORE,
        dampening_frac=0.3
    ),
    # 구간 3: 뒷부분 (오차 누적 방지를 위해 0.6 이상으로 조임)
    GPTQModifier(
        scheme=SCHEME,
        targets=LATE_TARGETS,
        ignore=IGNORE,
        dampening_frac=0.6
    )
]

oneshot(
    model=model,
    dataset=ds,
    recipe=recipe,
    max_seq_length=MAX_SEQUENCE_LENGTH,
    num_calibration_samples=NUM_CALIBRATION_SAMPLES,
)

print("[INFO] GPTQ 완료")

# Model Save

In [None]:
os.makedirs(OUT_DIR, exist_ok=True)

model.save_pretrained(OUT_DIR, save_compressed=True)
tokenizer.save_pretrained(OUT_DIR)

print(f"[INFO] 모델 저장 완료: {OUT_DIR}")

# Submission

In [None]:
zip_name = "102425603"
print(f"[INFO] {zip_name}.zip 생성 중...")

shutil.make_archive(
    base_name=zip_name,
    format="zip",
    root_dir=".",
    base_dir=OUT_DIR,
)

print(f"[INFO] 생성 완료: {zip_name}.zip")