### Tensorflow + EfficientNetV2B0 사용1코드
이미지 증강, Dropout 조정, EarlyStopping, ReduceLROnPlateau, Padding 등을 반영한 코드

In [1]:
import os
import pandas as pd
import numpy as np
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetV2B0  
from tensorflow.keras import layers, models, optimizers, callbacks
from sklearn.model_selection import train_test_split  # 수정됨

# 설정
train_dir = './open/train'
batch_size = 16
img_size = (224, 224)
checkpoint_dir = './checkpoints2'
os.makedirs(checkpoint_dir, exist_ok=True)
epoch_tracker_path = os.path.join(checkpoint_dir, 'epoch_tracker.txt')


In [2]:
# 라벨 인코딩
image_paths = list(Path(train_dir).rglob('*.jpg'))
df = pd.DataFrame({'image': [str(p) for p in image_paths]})
df['label'] = df['image'].apply(lambda x: Path(x).parent.name)
label2idx = {label: idx for idx, label in enumerate(sorted(df['label'].unique()))}
idx2label = {idx: label for label, idx in label2idx.items()}
df['label_idx'] = df['label'].map(label2idx)

# 균형 샘플링
df_balanced = df.groupby('label').apply(lambda x: x.sample(n=15000, random_state=42)).reset_index(drop=True)
train_df, val_df = train_test_split(df_balanced, test_size=0.3, stratify=df_balanced['label_idx'], random_state=42)

# 파일 경로와 라벨을 텐서로 변환
def df_to_dataset(df, shuffle=True):
    paths = df['image'].values
    labels = df['label_idx'].values.astype(np.int32)
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))

    def process(path, label):
        image = tf.io.read_file(path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.resize_with_pad(image, img_size[0], img_size[1])
        image = tf.image.convert_image_dtype(image, tf.float32)

        # 증강
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_brightness(image, 0.2)
        image = tf.image.random_contrast(image, 0.8, 1.2)
        image = tf.image.random_crop(tf.image.resize_with_crop_or_pad(image, 250, 250), [224, 224, 3])
        return image, label

    ds = ds.map(process, num_parallel_calls=tf.data.AUTOTUNE)
    if shuffle:
        ds = ds.shuffle(buffer_size=len(df))
    return ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)

train_dataset = df_to_dataset(train_df)
val_dataset = df_to_dataset(val_df, shuffle=False)


  df_balanced = df.groupby('label').apply(lambda x: x.sample(n=15000, random_state=42)).reset_index(drop=True)


In [3]:
# 마지막 학습 에포크 불러오기
def get_last_epoch():
    if os.path.exists(epoch_tracker_path):
        with open(epoch_tracker_path, 'r') as f:
            return int(f.read().strip()) + 1
    return 0

# 모델 생성
def build_model(num_classes):
    base_model = EfficientNetV2B0(include_top=False, input_shape=(224, 224, 3), pooling='avg', weights='imagenet')
    base_model.trainable = True
    model = models.Sequential([
        base_model,
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=optimizers.Adam(1e-4),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

model = build_model(len(label2idx))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/efficientnetv2-b0_notop.h5
[1m24274472/24274472[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [4]:
# 체크포인트 로드
last_epoch = get_last_epoch()
ckpt_path = os.path.join(checkpoint_dir, f'model_epoch_{last_epoch-1}.weights.h5')
if os.path.exists(ckpt_path):
    print(f"✅ 이전 체크포인트 {ckpt_path} 불러옵니다")
    model.load_weights(ckpt_path)
else:
    print("🚀 새로 학습을 시작합니다")

# 콜백 설정
class CustomCheckpoint(callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        path = os.path.join(checkpoint_dir, f'model_epoch_{epoch}.weights.h5')
        self.model.save_weights(path)
        with open(epoch_tracker_path, 'w') as f:
            f.write(str(epoch))
        print(f"📦 에포크 {epoch} 모델 저장 완료")

early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2)


🚀 새로 학습을 시작합니다


In [5]:
# 학습
model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=4,
    initial_epoch=last_epoch,
    callbacks=[CustomCheckpoint(), early_stop, reduce_lr]
)


Epoch 1/4
[1m4594/4594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 891ms/step - accuracy: 0.5871 - loss: 1.1068📦 에포크 0 모델 저장 완료
[1m4594/4594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4589s[0m 960ms/step - accuracy: 0.5871 - loss: 1.1067 - val_accuracy: 0.7681 - val_loss: 0.6417 - learning_rate: 1.0000e-04
Epoch 2/4
[1m4594/4594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.7351 - loss: 0.7269📦 에포크 1 모델 저장 완료
[1m4594/4594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5257s[0m 1s/step - accuracy: 0.7351 - loss: 0.7269 - val_accuracy: 0.7890 - val_loss: 0.5776 - learning_rate: 1.0000e-04
Epoch 3/4
[1m4594/4594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 921ms/step - accuracy: 0.7637 - loss: 0.6509📦 에포크 2 모델 저장 완료
[1m4594/4594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4736s[0m 987ms/step - accuracy: 0.7637 - loss: 0.6509 - val_accuracy: 0.8050 - val_loss: 0.5394 - learning_rate: 1.0000e-04
Epoch 4/4
[1m4594/4594[0m [3

<keras.src.callbacks.history.History at 0x13a032d8850>

In [8]:
# 테스트셋 로드 및 전처리
test_df = pd.read_csv('./open/test.csv')
test_df['image'] = test_df['img_path'].apply(lambda x: os.path.join('./open', x))

def test_df_to_dataset(df):
    paths = df['image'].values
    ds = tf.data.Dataset.from_tensor_slices(paths)

    def process(path):
        image = tf.io.read_file(path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.resize_with_pad(image, img_size[0], img_size[1])
        image = tf.image.convert_image_dtype(image, tf.float32)
        return image

    ds = ds.map(process, num_parallel_calls=tf.data.AUTOTUNE)
    return ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)

test_dataset = test_df_to_dataset(test_df)


# 모델 가중치 로드 (이미 되어 있으면 생략 가능)
model.load_weights(os.path.join(checkpoint_dir, f'model_epoch_{get_last_epoch()-1}.weights.h5'))


In [9]:
# 예측
preds = model.predict(test_dataset)
pred_labels = np.argmax(preds, axis=1)

# 제출 파일 저장
submission = pd.read_csv('./open/sample_submission.csv')
submission['rock_type'] = [idx2label[p] for p in pred_labels]
submission.to_csv('submission_tensorflow2.csv', index=False)
print("🎉 제출 파일 저장 완료!")


[1m5938/5938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m945s[0m 159ms/step
🎉 제출 파일 저장 완료!
