# 가구40 InceptionV3 모델 훈련

## 데이터 취득

In [2]:
# https://drive.google.com/file/d/18Qj8lEd4DxMSgxYeNMlT3vKYfYs_k8Zj/view?usp=sharing
import gdown, zipfile, os

file_id = '18Qj8lEd4DxMSgxYeNMlT3vKYfYs_k8Zj'
output = 'file.zip'

gdown.download(f'https://drive.google.com/uc?id={file_id}', output, quiet=False)

output_dir = 'furniture40'
os.makedirs(output_dir, exist_ok=True)

with zipfile.ZipFile(output, 'r') as z:
    z.extractall(output_dir)

Downloading...
From: https://drive.google.com/uc?id=18Qj8lEd4DxMSgxYeNMlT3vKYfYs_k8Zj
To: C:\DA36\workspaces\mini3\ShimJeongseok\file.zip
100%|██████████| 25.7M/25.7M [00:01<00:00, 21.7MB/s]


In [1]:
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras import applications
from tensorflow.keras.applications import Xception

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [3]:
import cv2
# 데이터 디렉토리 설정
data_dir = 'C:\\DA36\\workspaces\\mini3\\ShimJeongseok\\furniture40\\resized_data_40'
IMAGE_SIZE = (299, 299)
BATCH_SIZE = 64

class_names = os.listdir(data_dir)

class_mapping = {}
for idx, name in enumerate(class_names):
    class_mapping[name] = idx
# {0 : cabinet, 1 : chair....)
images = []
labels = []

for class_name, class_idx in class_mapping.items():
    class_dir = os.path.join(data_dir, class_name)
    for file_name in os.listdir(class_dir):
        file_path = os.path.join(class_dir, file_name)
        image = cv2.imread(file_path)
        if image is not None:  # 이미지가 정상적으로 로드되었는지 확인
            images.append(image)
            labels.append(class_idx)

# Numpy 배열로 변환
images = np.array(images, dtype=np.float32) / 255.0  # 정규화
labels = np.array(labels, dtype=np.int32)  # 라벨은 정수 배열로

In [4]:
# 전이학습
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Model

base_model = Xception(input_shape=(299, 299, 3), include_top=False, weights='imagenet')

for layer in base_model.layers[-40:]:
    layer.trainable = True

# 새로운 출력층 추가
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = layers.Dropout(0.4)(x)
x = Dense(2048, activation='relu', kernel_initializer='he_normal')(x)
x = layers.Dropout(0.4)(x)
predictions = Dense(4, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)
# model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m83683744/83683744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [5]:
from tensorflow.keras.utils import Sequence

from sklearn.utils import shuffle
import numpy as np
import cv2

BATCH_SIZE = 64
IMAGE_SIZE = 299

class Furniture40Sequence(Sequence):
    def __init__(self, images, labels, batch_size=BATCH_SIZE, image_size=IMAGE_SIZE, augmentor=None, shuffle=False, preprocess_function=None):
        self.images = images # path가 아닌 이미지 픽셀 데이터(ndarray)
        self.labels = labels # fit/evaluate에는 label데이터가 있지만, predict에는 label데이터가 없다.
        self.batch_size = batch_size
        self.image_size = image_size
        self.augmentor = augmentor
        self.shuffle = shuffle
        self.preprocess_function = preprocess_function # 스케일링 함수
        self.on_epoch_end() # shuffle 처리

    def __len__(self):
        # 이 데이터셋은 몇개의 batch로 구성되었는가
        return int(np.ceil(len(self.images) / self.batch_size))

    def __getitem__(self, index):
        # 이번 배치용 이미지/라벨 선정
        start = index * self.batch_size
        end = (index + 1) * self.batch_size
        this_batch_images = self.images[start:end]
        batch_labels = self.labels[start:end] if self.labels is not None else None
        batch_images = np.zeros((this_batch_images.shape[0], self.image_size, self.image_size, 3), dtype=np.float32)

        for i in range(this_batch_images.shape[0]): # 이번 배치 이미지만큼 반복
            image = this_batch_images[i]

            # 데이터 증강
            if self.augmentor is not None:
                image = self.augmentor(image)['image']

            # 리사이즈
            image = cv2.resize(image, (self.image_size, self.image_size))
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            # 스케일링
            if self.preprocess_function is not None:
                image = self.preprocess_function(image)

            batch_images[i] = image

        return (batch_images, batch_labels) if self.labels is not None else batch_images

    def on_epoch_end(self):
        if self.shuffle:
            self.images, self.labels = shuffle(self.images, self.labels)

In [6]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=0, stratify=labels)

# one-hot encoding
y_train_one_hot = to_categorical(y_train, 4)
y_test_one_hot = to_categorical(y_test, 4)

X_train.shape, X_test.shape, y_train.shape, y_test.shape


((643, 224, 224, 3), (161, 224, 224, 3), (643,), (161,))

In [8]:
# 학습/검증/평가용 Sequence객체 준비
from tensorflow.keras.applications.xception import preprocess_input

tr_seq = Furniture40Sequence(X_train, y_train_one_hot, preprocess_function=preprocess_input, shuffle=True)
val_seq = Furniture40Sequence(X_test, y_test_one_hot, preprocess_function=preprocess_input, shuffle=False)
test_seq = Furniture40Sequence(X_test, y_test_one_hot, preprocess_function=preprocess_input, shuffle=False)

tr_batch_images, tr_batch_labels = next(iter(tr_seq))
tr_batch_images.shape, tr_batch_labels.shape

((64, 299, 299, 3), (64, 4))

In [9]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

# class_weights = compute_class_weight('balanced', classes=np.unique(labels), y=labels)
# class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}

model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])

early_stopping_cb = EarlyStopping(patience=6, verbose=1, restore_best_weights=True)
reduce_lr_on_plateau_cb = ReduceLROnPlateau(patience=3, factor=0.5, verbose=1)

# 학습
history = model.fit(tr_seq, validation_data=val_seq, epochs=50, callbacks=[early_stopping_cb, reduce_lr_on_plateau_cb])

# 학습결과 시각화
pd.DataFrame(history.history).plot()
plt.show()

# 평가
loss, accuracy = model.evaluate(test_seq)
print(f'loss: {loss:.4f}, accuracy: {accuracy:.4f}')

  self._warn_if_super_not_called()


Epoch 1/50
[1m 4/11[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m35:26[0m 304s/step - accuracy: 0.3298 - loss: 1.3715

KeyboardInterrupt: 