In [None]:
import os, glob, random, numpy as np, tensorflow as tf
from tensorflow.keras import layers, models

# ======= 하이퍼파라미터 =======
IMG_SIZE     = 150
NUM_CLASSES  = 68                 # [초성19 | 중성21 | 종성28]
BATCH_SIZE   = 128
EPOCHS       = 300
INIT_LR      = 3e-4
TRAIN_RATIO  = 0.8
ALPHA        = 0.75               # MobileNetV2 width multiplier
AUG_ON       = True               # 강한 증강 on/off
MODE         = "stem"             # "stem" (권장) | "native"
OUT_DIR      = "./runs_MobileNetV2"
SHARD_DIR    = r"/home/voxfish_ljy$/virtual/ValData"   # <<< 여기를 변경

AUTOTUNE     = tf.data.AUTOTUNE

# ======= L/V/T 슬라이스 인덱스 =======
L_SLICE = slice(0, 19)
V_SLICE = slice(19, 40)
T_SLICE = slice(40, 68)

# ---------------------------------------------------------
# 증강
# ---------------------------------------------------------
def hangul_ocr_augment():
    return tf.keras.Sequential([
        layers.RandomRotation(0.2),
        layers.RandomTranslation(0.15, 0.15),
        layers.RandomZoom(0.20),
        layers.RandomHeight(0.10),
        layers.RandomWidth(0.10),
    ], name="hangul_aug")

# ---------------------------------------------------------
# 모델
# ---------------------------------------------------------
def build_mobilenetv2(mode="stem", alpha=0.75, dropout=0.25, aug=True):
    inp = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 1), name="image")
    x = layers.Rescaling(2.0, offset=-1.0, name="to_minus1_1")(inp)
    if aug:
        x = hangul_ocr_augment()(x)

    if mode == "native":
        base = tf.keras.applications.MobileNetV2(
            input_shape=(IMG_SIZE, IMG_SIZE, 1),
            include_top=False, weights=None, alpha=alpha, pooling="avg",
        )
        h = base(x)
    else:
        # 1채널 -> 3채널 투영(호환성↑)
        x3 = layers.Conv2D(3, 1, padding="same", use_bias=False, name="stem_1x1")(x)
        base = tf.keras.applications.MobileNetV2(
            input_tensor=x3, include_top=False, weights=None, alpha=alpha, pooling="avg",
        )
        h = base.output

    h = layers.Dropout(dropout, name="head_dropout")(h)
    out = layers.Dense(NUM_CLASSES, activation="softmax", name="char68")(h)
    return models.Model(inp, out, name=f"mobilenetv2_{mode}")

# ---------------------------------------------------------
# 커스텀 메트릭(학습 로그에 바로 표기)
# ---------------------------------------------------------
def acc_first(y_true, y_pred):
    return tf.reduce_mean(
        tf.cast(
            tf.equal(tf.argmax(y_true[:, L_SLICE], -1), tf.argmax(y_pred[:, L_SLICE], -1)),
            tf.float32
        )
    )

def acc_middle(y_true, y_pred):
    return tf.reduce_mean(
        tf.cast(
            tf.equal(tf.argmax(y_true[:, V_SLICE], -1), tf.argmax(y_pred[:, V_SLICE], -1)),
            tf.float32
        )
    )

def acc_last(y_true, y_pred):
    return tf.reduce_mean(
        tf.cast(
            tf.equal(tf.argmax(y_true[:, T_SLICE], -1), tf.argmax(y_pred[:, T_SLICE], -1)),
            tf.float32
        )
    )

def acc_joint(y_true, y_pred):
    l_ok = tf.equal(tf.argmax(y_true[:, L_SLICE], -1), tf.argmax(y_pred[:, L_SLICE], -1))
    v_ok = tf.equal(tf.argmax(y_true[:, V_SLICE], -1), tf.argmax(y_pred[:, V_SLICE], -1))
    t_ok = tf.equal(tf.argmax(y_true[:, T_SLICE], -1), tf.argmax(y_pred[:, T_SLICE], -1))
    return tf.reduce_mean(tf.cast(l_ok & v_ok & t_ok, tf.float32))

# ---------------------------------------------------------
# 샤드 → tf.data
# ---------------------------------------------------------
def _per_shard_dataset(x_path, y_path, shuffle_inside=True):
    # 메모리는 memmap 유지 (RAM 폭증 방지)
    X = np.load(x_path, mmap_mode="r")   # (N,150,150,1) uint8
    Y = np.load(y_path, mmap_mode="r")   # (N,68) float32
    n = X.shape[0]

    # 인덱스 스트림을 만들고, 인덱스로 memmap에 접근
    ds = tf.data.Dataset.from_tensor_slices(tf.range(n, dtype=tf.int64))
    if shuffle_inside:
        ds = ds.shuffle(8192)

    def _load_one_py(i):
        i = int(i)
        return X[i], Y[i]   # numpy 배열 반환 (uint8, float32)

    def _load_one(i):
        xi, yi = tf.py_function(
            _load_one_py, [i], [tf.uint8, tf.float32]
        )
        # 정적 shape 명시 (그래프 추론 도움)
        xi.set_shape((IMG_SIZE, IMG_SIZE, 1))
        yi.set_shape((NUM_CLASSES,))
        # 전처리
        x = tf.cast(xi, tf.float32) / 255.0
        y = yi
        return x, y

    ds = ds.map(_load_one, num_parallel_calls=AUTOTUNE)
    return ds

def build_ds_from_numpy_shards(
    shard_dir: str,
    batch_size: int = 256,
    train_ratio: float = 0.9,
    interleave_across_shards: bool = True,
    repeat: bool = False,
    shuffle_files: bool = True,
):
    X_files = sorted(glob.glob(os.path.join(shard_dir, "X_shard_*.npy")))
    Y_files = sorted(glob.glob(os.path.join(shard_dir, "Y_shard_*.npy")))
    assert len(X_files) == len(Y_files) and len(X_files) > 0, "샤드 파일이 없거나 개수가 불일치합니다."

    idxs = list(range(len(X_files)))
    if shuffle_files:
        random.shuffle(idxs)

    split = int(len(idxs) * train_ratio)
    train_idx, val_idx = idxs[:split], idxs[split:]

    def make_combo(idxs, shuffle_inside=True):
        shards = [_per_shard_dataset(X_files[i], Y_files[i], shuffle_inside=shuffle_inside) for i in idxs]
        if interleave_across_shards and len(shards) > 1:
            ds = tf.data.Dataset.sample_from_datasets(shards)
        else:
            ds = shards[0]
            for s in shards[1:]:
                ds = ds.concatenate(s)
        if repeat:
            ds = ds.repeat()
        ds = ds.batch(batch_size).prefetch(AUTOTUNE)
        return ds

    train_ds = make_combo(train_idx, shuffle_inside=True)
    val_ds   = make_combo(val_idx, shuffle_inside=False) if val_idx else None
    return train_ds, val_ds, len(train_idx), len(val_idx)

# ---------------------------------------------------------
# 오프라인 평가 (원하는 시점에 호출)
# ---------------------------------------------------------
def eval_lvt_metrics(model, dataset, max_batches=None):
    L_right = V_right = T_right = Joint_right = 0
    total = 0
    for b, (X, Y) in enumerate(dataset):
        P = model.predict(X, verbose=0)  # (B,68)

        yL = np.argmax(Y[:, L_SLICE], axis=1)
        yV = np.argmax(Y[:, V_SLICE], axis=1)
        yT = np.argmax(Y[:, T_SLICE], axis=1)

        pL = np.argmax(P[:, L_SLICE], axis=1)
        pV = np.argmax(P[:, V_SLICE], axis=1)
        pT = np.argmax(P[:, T_SLICE], axis=1)

        L_right += np.sum(yL == pL)
        V_right += np.sum(yV == pV)
        T_right += np.sum(yT == pT)
        Joint_right += np.sum((yL == pL) & (yV == pV) & (yT == pT))
        total += X.shape[0]

        if max_batches is not None and (b + 1) >= max_batches:
            break

    return {
        "acc_first": L_right / total,
        "acc_mid": V_right / total,
        "acc_last": T_right / total,
        "acc_joint": Joint_right / total,
        "samples": total,
    }

# ---------------------------------------------------------
# 학습 루틴
# ---------------------------------------------------------
def compile_and_train():
    os.makedirs(OUT_DIR, exist_ok=True)

    # 1) 데이터
    train_ds, val_ds, n_tr, n_va = build_ds_from_numpy_shards(
        shard_dir=SHARD_DIR,
        batch_size=BATCH_SIZE,
        train_ratio=TRAIN_RATIO,
        interleave_across_shards=True,
        repeat=False,
        shuffle_files=True,
    )
    print(f"train shards: {n_tr}, val shards: {n_va}")

    def hangul_loss(y_true, logits):
        cho = tf.nn.softmax_cross_entropy_with_logits(
            labels=y_true[:, L_SLICE],
            logits = logits[:, L_SLICE]
        )
        jung = tf.nn.softmax_cross_entropy_with_logits(
            labels=y_true[:, V_SLICE],
            logits = logits[:, V_SLICE]
        )
        jong = tf.nn.softmax_cross_entropy_with_logits(
            labels=y_true[:, T_SLICE],
            logits = logits[:, T_SLICE]
        )
        return cho+jung+jong
    # 2) 모델
    model = build_mobilenetv2(mode=MODE, alpha=ALPHA, dropout=0.25, aug=AUG_ON)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(INIT_LR),
        loss=hangul_loss,
        metrics=[acc_first, acc_middle, acc_last, acc_joint],
    )
    model.summary()

    # 3) 콜백
    ckpt_path = os.path.join(OUT_DIR, "ckpt_best.keras")
    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(
            ckpt_path, monitor="val_accuracy",
            save_best_only=True, save_weights_only=False
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss", factor=0.5, patience=5, min_lr=3e-6, verbose=1
        ),
        tf.keras.callbacks.EarlyStopping(
            monitor="val_loss", patience=10, restore_best_weights=True, verbose=1
        ),
        tf.keras.callbacks.CSVLogger(os.path.join(OUT_DIR, "train.csv")),
        tf.keras.callbacks.TensorBoard(log_dir=os.path.join(OUT_DIR, "tb")),
    ]

    # 4) 학습
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=EPOCHS,
        callbacks=callbacks,
        verbose=1,
    )

    # 5) 최종 저장
    model.save(os.path.join(OUT_DIR, "final.keras"))

    # 6) 검증 세트에서 L/V/T/Joint 지표 출력
    if val_ds is not None:
        metrics = eval_lvt_metrics(model, val_ds)
        print("\n========== L/V/T/Joint metrics on VAL ==========")
        print(f"Samples   : {metrics['samples']}")
        print(f"초성 정확도 : {metrics['acc_first']:.4f}")
        print(f"중성 정확도 : {metrics['acc_middle']:.4f}")
        print(f"종성 정확도 : {metrics['acc_last']:.4f}")
        print(f"총 정확도   : {metrics['acc_joint']:.4f}")
        print("===============================================\n")

    return model, history, (train_ds, val_ds)

# ---------------------------------------------------------
# 실행
# ---------------------------------------------------------
if __name__ == "__main__":
    # (선택) 혼합정밀도: 지원 GPU면 주석 해제
    tf.keras.mixed_precision.set_global_policy("mixed_float16")
    compile_and_train()

train shards: 17, val shards: 5


Epoch 1/300


2025-11-05 14:15:36.164094: E tensorflow/core/util/util.cc:131] oneDNN supports DT_HALF only on platforms with AVX-512. Falling back to the default Eigen-based implementation if present.
2025-11-05 14:15:37.702953: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91400


   3985/Unknown [1m2667s[0m 662ms/step - acc_first: 0.3373 - acc_joint: 0.1718 - acc_last: 0.4295 - acc_middle: 0.3461 - accuracy: 0.1083 - loss: 9.3715

2025-11-05 14:59:48.816778: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2025-11-05 14:59:48.817381: I tensorflow/core/framework/local_rendezvous.cc:430] Local rendezvous send item cancelled. Key hash: 6272963306562431588
2025-11-05 14:59:48.817391: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]
2025-11-05 14:59:48.817398: I tensorflow/core/framework/local_rendezvous.cc:430] Local rendezvous send item cancelled. Key hash: 15492697771443998863
2025-11-05 14:59:48.817400: I tensorflow/core/framework/local_rendezvous.cc:430] Local rendezvous send item cancelled. Key hash: 5677773322457126105
2025-11-05 14:59:48.817402: I tensorflow/core/framework/local_rendezvous.cc:430] Local rendezvous send item cancelled. Key hash: 456673798470145519
2025-11-05 

[1m3985/3985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2824s[0m 701ms/step - acc_first: 0.6201 - acc_joint: 0.4312 - acc_last: 0.6884 - acc_middle: 0.6186 - accuracy: 0.2167 - loss: 6.9254 - val_acc_first: 0.0691 - val_acc_joint: 3.8110e-04 - val_acc_last: 0.0910 - val_acc_middle: 0.0897 - val_accuracy: 0.0000e+00 - val_loss: 12.5761 - learning_rate: 3.0000e-04
Epoch 2/300
[1m3985/3985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 386ms/step - acc_first: 0.9370 - acc_joint: 0.8397 - acc_last: 0.9472 - acc_middle: 0.9331 - accuracy: 0.3313 - loss: 4.1299

2025-11-05 15:28:03.889761: I tensorflow/core/framework/local_rendezvous.cc:430] Local rendezvous send item cancelled. Key hash: 6272963306562431588
2025-11-05 15:28:03.889982: I tensorflow/core/framework/local_rendezvous.cc:430] Local rendezvous send item cancelled. Key hash: 15492697771443998863
2025-11-05 15:28:03.889988: I tensorflow/core/framework/local_rendezvous.cc:430] Local rendezvous send item cancelled. Key hash: 5677773322457126105
2025-11-05 15:28:03.889990: I tensorflow/core/framework/local_rendezvous.cc:430] Local rendezvous send item cancelled. Key hash: 456673798470145519
2025-11-05 15:28:03.889994: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous recv item cancelled. Key hash: 2697484732090457703
2025-11-05 15:28:03.889995: I tensorflow/core/framework/local_rendezvous.cc:430] Local rendezvous send item cancelled. Key hash: 1445589767044540121
2025-11-05 15:28:03.889999: I tensorflow/core/framework/local_rendezvous.cc:426] Local rendezvous recv ite

[1m3985/3985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1695s[0m 425ms/step - acc_first: 0.9465 - acc_joint: 0.8638 - acc_last: 0.9550 - acc_middle: 0.9443 - accuracy: 0.3336 - loss: 3.9847 - val_acc_first: 0.9591 - val_acc_joint: 0.8870 - val_acc_last: 0.9646 - val_acc_middle: 0.9487 - val_accuracy: 0.4048 - val_loss: 3.8040 - learning_rate: 3.0000e-04
Epoch 3/300
[1m3985/3985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1661s[0m 417ms/step - acc_first: 0.9655 - acc_joint: 0.9125 - acc_last: 0.9717 - acc_middle: 0.9658 - accuracy: 0.3403 - loss: 3.6852 - val_acc_first: 0.9784 - val_acc_joint: 0.9386 - val_acc_last: 0.9838 - val_acc_middle: 0.9685 - val_accuracy: 0.2790 - val_loss: 3.5680 - learning_rate: 3.0000e-04
Epoch 4/300
[1m3985/3985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1688s[0m 423ms/step - acc_first: 0.9723 - acc_joint: 0.9315 - acc_last: 0.9782 - acc_middle: 0.9739 - accuracy: 0.3403 - loss: 3.5844 - val_acc_first: 0.9767 - val_acc_joint: 0.9440 - val_ac

2025-11-05 16:26:30.271229: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]


[1m3985/3985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1666s[0m 418ms/step - acc_first: 0.9767 - acc_joint: 0.9421 - acc_last: 0.9818 - acc_middle: 0.9783 - accuracy: 0.3404 - loss: 3.5331 - val_acc_first: 0.9388 - val_acc_joint: 0.8356 - val_acc_last: 0.9519 - val_acc_middle: 0.9092 - val_accuracy: 0.3279 - val_loss: 4.0390 - learning_rate: 3.0000e-04
Epoch 6/300
[1m3985/3985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1658s[0m 416ms/step - acc_first: 0.9793 - acc_joint: 0.9487 - acc_last: 0.9838 - acc_middle: 0.9810 - accuracy: 0.3413 - loss: 3.5023 - val_acc_first: 0.2193 - val_acc_joint: 0.0152 - val_acc_last: 0.1513 - val_acc_middle: 0.2643 - val_accuracy: 0.1207 - val_loss: 20.4377 - learning_rate: 3.0000e-04
Epoch 7/300
[1m3985/3985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1668s[0m 419ms/step - acc_first: 0.9810 - acc_joint: 0.9534 - acc_last: 0.9853 - acc_middle: 0.9829 - accuracy: 0.3428 - loss: 3.4825 - val_acc_first: 0.9332 - val_acc_joint: 0.8256 - val_a

2025-11-05 18:18:08.110124: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[IteratorGetNext/_2]]


[1m3985/3985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 387ms/step - acc_first: 0.9841 - acc_joint: 0.9600 - acc_last: 0.9877 - acc_middle: 0.9854 - accuracy: 0.3430 - loss: 3.4568
Epoch 9: ReduceLROnPlateau reducing learning rate to 0.0001500000071246177.
[1m3985/3985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1698s[0m 426ms/step - acc_first: 0.9842 - acc_joint: 0.9607 - acc_last: 0.9877 - acc_middle: 0.9855 - accuracy: 0.3435 - loss: 3.4533 - val_acc_first: 0.1057 - val_acc_joint: 0.0022 - val_acc_last: 0.3361 - val_acc_middle: 0.1461 - val_accuracy: 0.0014 - val_loss: 40.7322 - learning_rate: 3.0000e-04
Epoch 10/300
[1m3985/3985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1692s[0m 424ms/step - acc_first: 0.9878 - acc_joint: 0.9699 - acc_last: 0.9902 - acc_middle: 0.9894 - accuracy: 0.3454 - loss: 3.4130 - val_acc_first: 0.4699 - val_acc_joint: 0.1187 - val_acc_last: 0.8104 - val_acc_middle: 0.3677 - val_accuracy: 0.0605 - val_loss: 28.5079 - learning_rate: 1.5

KeyError: 'acc_middle'