## Tensorflow EfficientNetV2B2 1번째 시도 
### tensorflow.keras.applications

In [1]:
import os
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetV2B2
from tensorflow.keras import layers, models, optimizers, callbacks

# 설정
train_dir = './open/train'
batch_size = 16
img_size = (224, 224)
checkpoint_dir = './checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)
epoch_tracker_path = os.path.join(checkpoint_dir, 'epoch_tracker.txt')


In [2]:
# 라벨 인코딩
image_paths = list(Path(train_dir).rglob('*.jpg'))
df = pd.DataFrame({'image': [str(p) for p in image_paths]})
df['label'] = df['image'].apply(lambda x: Path(x).parent.name)
label2idx = {label: idx for idx, label in enumerate(sorted(df['label'].unique()))}
idx2label = {idx: label for label, idx in label2idx.items()}
df['label_idx'] = df['label'].map(label2idx)

# 폴더당 15,000장 균형 샘플링
df_balanced = df.groupby('label').apply(lambda x: x.sample(n=15000, random_state=42)).reset_index(drop=True)
train_df, val_df = train_test_split(df_balanced, test_size=0.3, stratify=df_balanced['label_idx'], random_state=42)


  df_balanced = df.groupby('label').apply(lambda x: x.sample(n=15000, random_state=42)).reset_index(drop=True)


In [3]:
# 문자열로 변환
train_df['label_idx'] = train_df['label_idx'].astype(str)
val_df['label_idx'] = val_df['label_idx'].astype(str)

# 이미지 제너레이터
train_gen = ImageDataGenerator(rescale=1./255, horizontal_flip=True, zoom_range=0.2)
val_gen = ImageDataGenerator(rescale=1./255)

train_generator = train_gen.flow_from_dataframe(
    train_df, x_col='image', y_col='label_idx',
    target_size=img_size, batch_size=batch_size, class_mode='sparse')

val_generator = val_gen.flow_from_dataframe(
    val_df, x_col='image', y_col='label_idx',
    target_size=img_size, batch_size=batch_size, class_mode='sparse')


Found 73500 validated image filenames belonging to 7 classes.
Found 31500 validated image filenames belonging to 7 classes.


In [4]:
# 마지막 학습 에포크 불러오기
def get_last_epoch():
    if os.path.exists(epoch_tracker_path):
        with open(epoch_tracker_path, 'r') as f:
            return int(f.read().strip()) + 1  # 다음 에포크부터 시작
    return 0

# 모델 생성
def build_model(num_classes):
    base_model = EfficientNetV2B2(include_top=False, input_shape=(224, 224, 3), pooling='avg', weights='imagenet')
    base_model.trainable = True

    model = models.Sequential([
        base_model,
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=optimizers.Adam(1e-4),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

model = build_model(len(label2idx))


In [5]:
# 마지막 체크포인트 로드
last_epoch = get_last_epoch()
# ckpt_path = os.path.join(checkpoint_dir, f'model_epoch_{last_epoch-1}.h5')
ckpt_path = os.path.join(checkpoint_dir, f'model_epoch_{last_epoch-1}.weights.h5')
if os.path.exists(ckpt_path):
    print(f"✅ 이전 체크포인트 {ckpt_path} 불러옵니다")
    model.load_weights(ckpt_path)
else:
    print("🚀 새로 학습을 시작합니다")

# 콜백: 에포크별 저장 + 현재 에포크 기록
class CustomCheckpoint(callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        # epoch_path = os.path.join(checkpoint_dir, f'model_epoch_{epoch}.h5')
        epoch_path = os.path.join(checkpoint_dir, f'model_epoch_{epoch}.weights.h5')
        self.model.save_weights(epoch_path)
        with open(epoch_tracker_path, 'w') as f:
            f.write(str(epoch))
        print(f"📦 에포크 {epoch} 모델 저장 완료")


✅ 이전 체크포인트 ./checkpoints\model_epoch_7.weights.h5 불러옵니다


  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=20,
    initial_epoch=last_epoch,
    callbacks=[CustomCheckpoint()]
)


  self._warn_if_super_not_called()


Epoch 9/20


In [None]:
# 테스트셋 로드 및 예측
test_df = pd.read_csv('./open/test.csv')
test_df['image'] = test_df['img_path'].apply(lambda x: os.path.join('./open', x))

test_gen = ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_dataframe(
    test_df, x_col='image', y_col=None,
    target_size=img_size, batch_size=batch_size,
    class_mode=None, shuffle=False)

# 마지막 체크포인트 모델 로드
model.load_weights(os.path.join(checkpoint_dir, f'model_epoch_{get_last_epoch()-1}.h5'))

# 예측
preds = model.predict(test_generator)
pred_labels = np.argmax(preds, axis=1)

# 라벨 복원 및 제출 파일 저장
submission = pd.read_csv('./open/sample_submission.csv')
submission['rock_type'] = [idx2label[p] for p in pred_labels]
submission.to_csv('submission_tensorflow.csv', index=False)
print("🎉 제출 파일 저장 완료!")


# 

## ✅ 전체 코드

# 