In [46]:
# import zipfile
import os, re, glob
import pandas as pd
import tensorflow as tf
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers import Adam


from pathlib import Path

In [47]:
## 변경수

import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator

seed = 0
np.random.seed(seed)
tf.random.set_seed(seed)


csv_path = "./pokemon_gen1.csv" 
img_dir = "./PokemonData"

#csv 불러오기
df = pd.read_csv(csv_path)
import os

# 이미지 경로 
image_paths = []

for name in df["Name"]:
    folder_path = os.path.join(img_dir, name)  # 예: "./PokemonData/Bulbasaur"
    if os.path.exists(folder_path):
        files = os.listdir(folder_path)  # 해당 폴더 안의 파일들
        for f in files:
            image_paths.append({
                "Name": name,
                "filepath": os.path.join(folder_path, f)
            })


# 새로운 DataFrame으로 정리
image_df = pd.DataFrame(image_paths)
print(image_df.head())    #잘 불러와 지는 지 확인

#타입 분류
df["Type2"] = df["Type2"].fillna("")  #fillna => 결측치를 공백으로 변환
df["labels"] = df.apply(lambda x: [x["Type1"]] if x["Type2"] == "" else [x["Type1"], x["Type2"]], axis=1) #lambda = 한 줄로 함수 만들기
print(df) #결과 확인

mlb = MultiLabelBinarizer()        # MultiLabelBinarizer타입을 2진수로 변환하기 위해 사용하는 도구
x = mlb.fit_transform(df["labels"]) # 타입을 2진수로 변환

print("클래스(타입):", mlb.classes_)
print("샘플 라벨:", x[0])


        Name                                 filepath
0  Bulbasaur  ./PokemonData/Bulbasaur/Bulbasaur41.jpg
1  Bulbasaur  ./PokemonData/Bulbasaur/Bulbasaur40.jpg
2  Bulbasaur  ./PokemonData/Bulbasaur/Bulbasaur42.jpg
3  Bulbasaur  ./PokemonData/Bulbasaur/Bulbasaur43.jpg
4  Bulbasaur  ./PokemonData/Bulbasaur/Bulbasaur47.jpg
           Name    Type1   Type2            labels
0     Bulbasaur    Grass  Poison   [Grass, Poison]
1       Ivysaur    Grass  Poison   [Grass, Poison]
2      Venusaur    Grass  Poison   [Grass, Poison]
3    Charmander     Fire                    [Fire]
4    Charmeleon     Fire                    [Fire]
..          ...      ...     ...               ...
146     Dratini   Dragon                  [Dragon]
147   Dragonair   Dragon                  [Dragon]
148   Dragonite   Dragon  Flying  [Dragon, Flying]
149      Mewtwo  Psychic                 [Psychic]
150         Mew  Psychic                 [Psychic]

[151 rows x 4 columns]
클래스(타입): ['Bug' 'Dragon' 'Electric' 'Fai

In [48]:
## 김규민
# 1) 이미지-라벨 병합 (멀티라벨 원-핫을 이미지별로 부여)
label_cols = list(mlb.classes_)                      # 타입 이름들
y_df = pd.DataFrame(x, columns=label_cols)           # x: MultiLabelBinarizer 결과

name_label_df = pd.concat([df[["Name"]].reset_index(drop=True), y_df], axis=1)
merged = image_df.merge(name_label_df, on="Name", how="inner")

# 이미지 확장자 필터
# valid_ext = (".png", ".jpg", ".jpeg", ".webp", ".bmp")
# merged = merged[merged["filepath"].str.lower().str.endswith(valid_ext)].reset_index(drop=True)

# 2) 학습/검증 분리
train_df, val_df = train_test_split(merged, test_size=0.2, random_state=42, shuffle=True)

# 3) 전처리 & 제너레이터 (MobileNetV2 전처리 일관 적용)
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20, width_shift_range=0.1, height_shift_range=0.1,
    zoom_range=0.15, horizontal_flip=True
)
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

target_size = (224, 224)
batch_size = 32

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col="filepath",
    y_col=label_cols,              # 멀티라벨 원-핫 컬럼들
    target_size=target_size,
    batch_size=batch_size,
    class_mode="raw",              # 멀티라벨 → raw
    shuffle=True
)
validation_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col="filepath",
    y_col=label_cols,
    target_size=target_size,
    batch_size=batch_size,
    class_mode="raw",
    shuffle=False
)

# 4) 모델 (MobileNetV2 백본 + 멀티라벨 헤드)
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224,224,3))
base_model.trainable = False

x_in = base_model.output
x_in = GlobalAveragePooling2D()(x_in)
x_in = Dropout(0.2)(x_in)
num_labels = len(label_cols)
predictions = Dense(num_labels, activation='sigmoid')(x_in)  # 멀티라벨

model = Model(inputs=base_model.input, outputs=predictions)

from tensorflow.keras.optimizers import Adam
model.compile(optimizer=Adam(learning_rate=1e-4),
              loss='binary_crossentropy',   # 멀티라벨 손실
              metrics=['accuracy'])

# 4) 모델 컴파일까지 동일 ...

# === 콜백 설정 ===
ckpt_path = "best_pokemon_types_multilabel.keras"  # 가장 좋은 모델 저장 파일명
checkpoint = ModelCheckpoint(
    ckpt_path,
    monitor='val_loss',           # 검증 손실 기준
    save_best_only=True,          # 가장 좋은 지점만 저장
    mode='min',
    verbose=1
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=7,                   # 7 epochs 개선 없으면 중단(원하는 값으로 조절)
    mode='min',
    restore_best_weights=True,    # 가장 성능 좋았던 가중치로 복원
    verbose=1
)


# 5) 학습
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=100,
    callbacks=[checkpoint, early_stopping]  # 콜백 추가
)

# 6) 저장(최종 가중치도 따로 저장하고 싶다면 유지)
model.save('pokemon_types_multilabel.keras')

Found 9556 validated image filenames.
Found 2389 validated image filenames.


  self._warn_if_super_not_called()


Epoch 1/100
[1m 50/299[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m54s[0m 219ms/step - accuracy: 0.0576 - loss: 0.6048

KeyboardInterrupt: 