<a href="https://colab.research.google.com/github/saykim/ds/blob/main/%EC%BC%80%EB%9D%BC%EC%8A%A4%EC%99%B8%EA%B4%80%EC%9D%B4%EB%AF%B8%EC%A7%80_%EC%A0%95%EC%83%81_%EB%B6%88%EB%9F%89_230221.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 케라스 모델로 외관불량 이미지 분류
* 케라스 함수형 사용
* 이미지 변형
* 

In [None]:
import os
import glob
import numpy as np
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras.utils import to_categorical
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from keras.models import Model
from keras.callbacks import EarlyStopping




In [None]:
# 이미지 경로
train_path = 'train/*.jpg'
test_path = 'test/*.jpg'

# 이미지 크기
img_width, img_height = 224, 224

# 입력 이미지 크기
input_shape = (img_width, img_height, 3)

# 불러올 이미지 개수
nb_train_samples = len(glob.glob(train_path))
nb_test_samples = len(glob.glob(test_path))

# 클래스 개수
nb_classes = 2

# 데이터 증강
datagen_train = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest')

# 학습 데이터셋 생성
x_train = np.zeros((nb_train_samples * 11, img_width, img_height, 3), dtype=np.float32)
y_train = np.zeros((nb_train_samples * 11, nb_classes), dtype=np.float32)

i = 0
for file in glob.glob(train_path):
    img = load_img(file, target_size=(img_width, img_height))
    img_array = img_to_array(img)
    x_train[i] = img_array

    label = os.path.basename(file).split('_')[0]
    if label == '정상':
        y_train[i][0] = 1
    else:
        y_train[i][1] = 1
    i += 1

    img_aug = img_array.reshape((1,) + img_array.shape)
    for batch in datagen_train.flow(img_aug, batch_size=1):
        x_train[i] = batch[0]
        if label == '정상':
            y_train[i][0] = 1
        else:
            y_train[i][1] = 1
        i += 1
        if i % nb_train_samples == 0:
            break

# 검증 데이터셋 생성
x_test = np.zeros((nb_test_samples, img_width, img_height, 3), dtype=np.float32)
y_test = np.zeros((nb_test_samples, nb_classes), dtype=np.float32)

i = 0
for file in glob.glob(test_path):
    img = load_img(file, target_size=(img_width, img_height))
    img_array = img_to_array(img)
    x_test[i] = img_array

    label = os.path.basename(file).split('_')[0]
    if label == '정상':
        y_test[i][0] = 1
    else:
        y_test[i][1] = 1
    i += 1

# 모델 아키텍처
inputs = Input(shape=input_shape)
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

flatten = Flatten()(pool3)
fc1 = Dense(512, activation='relu')(flatten)
fc2 = Dense(256, activation='relu')(fc1)
outputs = Dense(nb_classes, activation='softmax')(fc2)

In [None]:
model = Model(inputs=inputs, outputs=outputs)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])



In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5)

In [None]:
model.fit(x_train, y_train, batch_size=32, epochs=100, validation_data=(x_test, y_test), callbacks=[early_stopping])

In [None]:
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# 불량, 정상간의 데이터 불균형

>만약 외관 이미지 불량이 전체 생산량 중에 0.1%도 발생하지 않는다면, 데이터 불균형 
문제가 발생합니다. 이런 경우, 모델이 정상이라고 예측하는 경우가 매우 높아지기 때문에, 정상인 경우에 대한 예측력이 높아지는 결과를 초래할 수 있습니다.

>이를 해결하기 위해서는 데이터셋의 불균형을 해소할 필요가 있습니다. 데이터셋의 불균형 문제를 해결하기 위해서는 데이터를 증강하는 방법 외에도, 언더샘플링, 오버샘플링, 클래스 가중치 적용 등의 방법을 이용할 수 있습니다. 이 중에서 클래스 가중치 적용 방법을 적용하여 최적화한 코드를 작성해보겠습니다.

In [None]:
import os
import glob
import numpy as np
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras.utils import to_categorical
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from keras.models import Model
from keras.callbacks import EarlyStopping

# 이미지 경로
train_path = 'train/*.jpg'
test_path = 'test/*.jpg'

# 이미지 크기
img_width, img_height = 224, 224

# 입력 이미지 크기
input_shape = (img_width, img_height, 3)

# 불러올 이미지 개수
nb_train_samples = len(glob.glob(train_path))
nb_test_samples = len(glob.glob(test_path))

# 클래스 개수
nb_classes = 2

# 데이터 증강
datagen_train = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest')

# 학습 데이터셋 생성
x_train = np.zeros((nb_train_samples * 11, img_width, img_height, 3), dtype=np.float32)
y_train = np.zeros((nb_train_samples * 11, nb_classes), dtype=np.float32)

i = 0
count_defect = 0
for file in glob.glob(train_path):
    img = load_img(file, target_size=(img_width, img_height))
    img_array = img_to_array(img)
    x_train[i] = img_array

    label = os.path.basename(file).split('_')[0]
    if label == '정상':
        y_train[i][0] = 1
    else:
        y_train[i][1] = 1
        count_defect += 1
    i += 1

    img_aug = img_array.reshape((1,) + img_array.shape)
    for batch in datagen_train.flow(img_aug, batch_size=1):
        x_train[i] = batch[0]
        if label == '정상':
            y_train[i][0] = 1
        else:
            y_train[i][1] = 1
            count_defect += 1
        i += 1
        if i % nb_train_samples == 0:
            break

#클래스 가중치 계산
class_weight = {0: 1, 1: nb_train_samples / count_defect}

#검증 데이터셋 생성
x_test = np.zeros((nb_test_samples, img_width, img_height, 3), dtype=np.float32)
y_test = np.zeros((nb_test_samples, nb_classes), dtype=np.float32)

i = 0
for file in glob.glob(test_path):
img = load_img(file, target_size=(img_width, img_height))
img_array = img_to_array(img)
x_test[i] = img_array

label = os.path.basename(file).split('_')[0]
if label == '정상':
    y_test[i][0] = 1
else:
    y_test[i][1] = 1
i += 1


In [None]:
# 모델 아케텍처

inputs = Input(shape=input_shape)
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

flatten = Flatten()(pool3)
fc1 = Dense(512, activation='relu')(flatten)
fc2 = Dense(256, activation='relu')(fc1)
outputs = Dense(nb_classes, activation='softmax')(fc2)

model = Model(inputs=inputs, outputs=outputs)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

#조기 종료
early_stopping = EarlyStopping(monitor='val_loss', patience=5)


#학습
model.fit(x_train, y_train, batch_size=32, epochs=100, validation_data=(x_test, y_test), \
          class_weight=class_weight, callbacks=[early_stopping])

#평가
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


In [None]:
class_weight = {0: 1, 1: nb_train_samples / count_defect}

#여기서, 클래스 0은 정상, 클래스 1은 불량을 나타내는 라벨입니다.
#클래스 0은 1로, 클래스 1은 전체 학습 데이터셋에서 불량 이미지의 비율로 가중치를 부여합니다. 
#이렇게 계산된 클래스 가중치는 모델 학습 시 fit() 메서드에서 class_weight 매개변수로 전달합니다.

In [None]:
# 학습

model.fit(x_train, y_train, batch_size=32, epochs=100, validation_data=(x_test, y_test), \
          class_weight=class_weight, callbacks=[early_stopping])

# 평가
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# 최종 완성코드

In [None]:
import os
import glob
import numpy as np
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras.utils import to_categorical
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from keras.models import Model
from keras.callbacks import EarlyStopping

# 이미지 경로
train_path = 'train/*.jpg'
test_path = 'test/*.jpg'

# 이미지 크기
img_width, img_height = 224, 224

# 입력 이미지 크기
input_shape = (img_width, img_height, 3)

# 불러올 이미지 개수
nb_train_samples = len(glob.glob(train_path))
nb_test_samples = len(glob.glob(test_path))

# 클래스 개수
nb_classes = 2

# 데이터 증강
datagen_train = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest')

# 학습 데이터셋 생성
x_train = np.zeros((nb_train_samples * 11, img_width, img_height, 3), dtype=np.float32)
y_train = np.zeros((nb_train_samples * 11, nb_classes), dtype=np.float32)

i = 0
count_defect = 0
for file in glob.glob(train_path):
    img = load_img(file, target_size=(img_width, img_height))
    img_array = img_to_array(img)
    x_train[i] = img_array

    label = os.path.basename(file).split('_')[0]
    if label == '정상':
        y_train[i][0] = 1
    else:
        y_train[i][1] = 1
        count_defect += 1
    i += 1

    img_aug = img_array.reshape((1,) + img_array.shape)
    for batch in datagen_train.flow(img_aug, batch_size=1):
        x_train[i] = batch[0]
        if label == '정상':
            y_train[i][0] = 1
        else:
            y_train[i][1] = 1
            count_defect += 1
        i += 1
        if i % nb_train_samples == 0:
            break

# 클래스 가중치 계산
class_weight = {0: 1, 1: nb_train_samples / count_defect}

# 검증 데이터셋 생성
x_test = np.zeros((nb_test_samples, img_width, img_height, 3), dtype=np.float32)
y_test = np.zeros((nb_test_samples, nb_classes), dtype=np.float32)

i = 0
for file in glob.glob(test_path):
    img = load_img(file, target_size=(img_width, img_height))
    img_array = img_to_array(img)
    x_test[i] = img_array

    label = os.path.basename(file).split('_')[0]
    if label == '정상':
        y_test[i][0] = 1
    else:
        y_test[i][1] = 1
    i += 1

# 모델 아키텍처
inputs = Input(shape=input_shape)
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

flatten = Flatten()(pool3)
fc1 = Dense(512, activation='relu')(flatten)
fc2 = Dense(256, activation='relu')(fc1)
outputs = Dense(nb_classes, activation='softmax')(fc2)

model = Model(inputs=inputs, outputs=outputs)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# 조기 종료
early_stopping = EarlyStopping(monitor='val_loss', patience=5)

# 학습
model.fit(x_train, y_train, batch_size=32, epochs=100, validation_data=(x_test, y_test), class_weight=class_weight, callbacks=[early_stopping])

# 평가
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


## 그래프로 확인

In [None]:
import matplotlib.pyplot as plt

# 학습 결과 저장
history = model.fit(x_train, y_train, batch_size=32, epochs=100, validation_data=(x_test, y_test), class_weight=class_weight, callbacks=[early_stopping])

# 손실 그래프
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['train', 'val'], loc='upper right')
plt.show()

# 정확도 그래프
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['train', 'val'], loc='lower right')
plt.show()


#위 코드를 실행하면 학습 결과를 저장한 history 객체를 이용하여 손실 그래프와 정확도 그래프를 그릴 수 있습니다.