In [None]:
import timeit
import io
import os
import numpy as np
import tensorflow as tf

from PIL import Image  # 파이썬 이미지 라이브러리
import matplotlib.pyplot as plt

from keras.preprocessing.image import ImageDataGenerator

In [None]:
X = np.load('./211024_rgb_cnn_input.npy')
Y = np.load('./211024_rgb_cnn_target.npy')

In [None]:
print("인풋 세트의 배열형태: ", X.shape)
print("타겟 세트의 배열형태: ", Y.shape)

인풋 세트의 배열형태:  (4504, 128, 128, 3)
타겟 세트의 배열형태:  (4504,)


In [None]:
x = np.unique(Y, return_counts=True)
damage_class = int(x[0][0])
normal_class = int(x[0][1])

damage_count = int(x[1][0])
normal_count = int(x[1][1])

print("손상 컨테이너: {}장".format(damage_count))
print("정상 컨테이너: {}장".format(normal_count))

total = damage_count + normal_count
d_ratio = damage_count/total*100
n_ratio = normal_count/total*100

print("손상 사진 비율: {0:.2f}%, 정상 사진 비율: {1:.2f}%".format(d_ratio, n_ratio))

손상 컨테이너: 2272장
정상 컨테이너: 2232장
손상 사진 비율: 50.44%, 정상 사진 비율: 49.56%


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=42)
# 셔플 없이 하면 편향될 수 있음
# X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, shuffle =true, random_state=42)

In [None]:
# 255로 나눠주어 전처리 시작
my_size = 128
X_train = X_train.reshape(X_train.shape[0], my_size, my_size, 3).astype('float32')/255
X_test = X_test.reshape(X_test.shape[0], my_size, my_size, 3).astype('float32')/255
print("훈련 세트 배열: ", X_train.shape)
print("테스트 세트 배열: ", X_test.shape)

훈련 세트 배열:  (3603, 128, 128, 3)
테스트 세트 배열:  (901, 128, 128, 3)


In [None]:
# 딥러닝에 필요한 라이브러리 불러오기
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D

import tensorflow as tf

np.random.seed(3)
tf.random.set_seed(3)

### cnn  구성이 깊은 모델로 해봤더니, 결과가 별로 좋지 않았다
모델이 무겁다고 성능이 좋은 게 아니라는 측면을 알게 되어, 가볍게 cnn을 설계 해보았다
-  filter개수가 32개, 사이즈는 (3,3)인 conv layer 한 층을 쌓고, 
- Max pooling layer를 1개 쌓고,
- Dense 층으로 분류를 해주었다.

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), input_shape=(128, 128, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(1, activation='sigmoid'))
model.summary()


Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 126, 126, 32)      896       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 63, 63, 32)        0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 127008)            0         
_________________________________________________________________
dense_6 (Dense)              (None, 256)               32514304  
_________________________________________________________________
dropout_3 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 257       
Total params: 32,515,457
Trainable params: 32,515,457
Non-trainable params: 0
__________________________________________

In [None]:
model.compile(loss='binary_crossentropy',
             optimizer='adam', 
             metrics=['accuracy'])

In [None]:

history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=67)

In [None]:
# 테스트 세트에 적용해보기
print('\n Test Accurary: %.4f' % (model.evaluate(X_test, Y_test)[1]))


 Test Accurary: 0.8801


2번째시도:  
    conv layer의 filter를 64로 바꾸어서 시행.  
    결과는 정확도는 조금 올라가고, loss가 조금 내려갔다.  

In [None]:
model = Sequential()
model.add(Conv2D(64, kernel_size=(3, 3), input_shape=(128, 128, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
# model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(1, activation='sigmoid'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 126, 126, 64)      1792      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 63, 63, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 254016)            0         
_________________________________________________________________
dense_2 (Dense)              (None, 256)               65028352  
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 257       
Total params: 65,030,401
Trainable params: 65,030,401
Non-trainable params: 0
__________________________________________

In [None]:
model.compile(loss='binary_crossentropy',
             optimizer='adam', 
             metrics=['accuracy'])

In [None]:

history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=67)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
# 테스트 세트에 적용해보기
print('\n Test Accurary: %.4f' % (model.evaluate(X_test, Y_test)[1]))


 Test Accurary: 0.8901


3번째시도:  
    conv layer의 filter를 사이즈를 키워서 (4,4)로 시행.  
    그 결과, 정확도는 조금 내려가고, loss가 조금 내려가는 효과가 있었다.  

In [None]:
model = Sequential()
model.add(Conv2D(64, kernel_size=(4, 4), input_shape=(128, 128, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
# model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(1, activation='sigmoid'))
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 125, 125, 64)      3136      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 62, 62, 64)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 246016)            0         
_________________________________________________________________
dense_4 (Dense)              (None, 256)               62980352  
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 257       
Total params: 62,983,745
Trainable params: 62,983,745
Non-trainable params: 0
__________________________________________

In [None]:
model.compile(loss='binary_crossentropy',
             optimizer='adam', 
             metrics=['accuracy'])

In [None]:

history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=67)

In [None]:
# 테스트 세트에 적용해보기
print('\n Test Accurary: %.4f' % (model.evaluate(X_test, Y_test)[1]))

In [None]:
4번째시도:
    conv layer의 filter 사이즈를 줄여서 (2,2)로 시행.
    그 결과, 정확도는 조금 상승하고, loss가 조금 줄어드는 효과가 있었다.

In [None]:
model = Sequential()
model.add(Conv2D(64, kernel_size=(2, 2), input_shape=(128, 128, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
# model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(1, activation='sigmoid'))
model.summary()

In [None]:
model.compile(loss='binary_crossentropy',
             optimizer='adam', 
             metrics=['accuracy'])

In [None]:

history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=67)

In [None]:
# 테스트 세트에 적용해보기
print('\n Test Accurary: %.4f' % (model.evaluate(X_test, Y_test)[1]))

5번째 시도:  
    conv layer를 비슷한 사례의 논문을 참조하여, 층 설계방향을 닮게 해보았다  
    정확도가 92%로 상승하였고, loss가 0.6077로 나왔다.

In [None]:
model = Sequential()
model.add(Conv2D(64, kernel_size=(2, 2), input_shape=(128, 128, 3), activation='relu'))
model.add(Conv2D(128, kernel_size=(2, 2), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(256, kernel_size=(2, 2), activation='relu'))
model.add(Conv2D(512, kernel_size=(2, 2), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
# model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(1, activation='sigmoid'))
model.summary()

In [None]:
model.compile(loss='binary_crossentropy',
             optimizer='adam', 
             metrics=['accuracy'])

In [None]:

history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=30, batch_size=67)

In [None]:
# 테스트 세트에 적용해보기
print('\n Test Accurary: %.4f' % (model.evaluate(X_test, Y_test)[1]))