In [9]:
# 그래픽 카드 둘로 쓰기 (두개 있을때, 하나만 있다면 0)
# gpu idx 를 0 또는 1 로 설정하시오
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] ="{}".format(1) # gpu idx

In [10]:
# 경로에 폴더가 없으면 폴더 만들기
import os

def createDirectory(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print("Error: Failed to create the directory.")

In [11]:
import numpy as np
import itertools
import pathlib
import cv2

import matplotlib.pyplot as plt

from tensorflow.keras import layers
from tensorflow import keras
from tqdm import tqdm

EPOCH = 100
KERNEL_SIZE = 3
BATCH_SIZE = 128
IMAGE_HEIGHT = 256
IMAGE_WIDTH = 256

DATA_PATH = "./graph_data/"

def list_to_list(input_list):
    input_list_to_list = list(itertools.chain(*input_list))
    return input_list_to_list

# 데이터 불러오기

In [12]:
data_dir = pathlib.Path(DATA_PATH)
print(data_dir)

image_count = len(list(data_dir.glob('*/*.png')))
print(image_count)

f = list(data_dir.glob('F/*'))
n = list(data_dir.glob('N/*'))
q = list(data_dir.glob('Q/*'))
s = list(data_dir.glob('S/*'))
v = list(data_dir.glob('V/*'))

graph_data
112599


# 데이터 split
## train, test, validation data 나누기

In [13]:
parents_path = DATA_PATH
child_path = os.listdir(parents_path)

temp_converted_img = list()
converted_img = list()

for pic_path in (child_path):
    current_path = os.listdir(parents_path + pic_path)
    print("[INFO] Current path : " + parents_path + pic_path)
    for file_name in tqdm(current_path):
        path_for_array = parents_path + pic_path + "/" + file_name
        
        img_array = np.fromfile(path_for_array, np.uint8)
        img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_resize = cv2.resize(img, dsize=(256, 256), interpolation=cv2.INTER_AREA)
        temp_converted_img.append(img_resize / 255.0)
    converted_img.append(temp_converted_img)

converted_img = np.array(converted_img)

[INFO] Current path : ./graph_data/F


100%|██████████| 803/803 [00:09<00:00, 87.58it/s]


[INFO] Current path : ./graph_data/N


 30%|███       | 27276/90631 [05:53<14:33, 72.49it/s]  

In [None]:
print(converted_img)

In [6]:
train_ds = keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="training",
    label_mode='int',
    # label_mode='categorical',
    color_mode='grayscale',
    seed=1234,
    image_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    batch_size=BATCH_SIZE,
    shuffle=True
)

Found 112599 files belonging to 5 classes.
Using 90080 files for training.


In [7]:
val_ds = keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="validation",
    label_mode='int',
    # label_mode='categorical',
    color_mode='grayscale',
    seed=1234,
    image_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
    batch_size=BATCH_SIZE,
    shuffle=True
)

Found 112599 files belonging to 5 classes.
Using 22519 files for validation.


# 모델 생성

In [9]:
input_size = (256, 256, 1)

models = keras.Sequential([
    # tf.keras.layers.experimental.preprocessing.Rescaling(1./255),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu', input_shape=input_size),
    layers.MaxPool2D(pool_size=(2, 2), strides=2),

    layers.Conv2D(128, kernel_size=(2, 2), activation='relu', input_shape=input_size),
    layers.MaxPool2D(pool_size=(2, 2), strides=2),

    layers.Conv2D(256, kernel_size=(3, 3), activation='relu', input_shape=input_size),
    layers.MaxPool2D(pool_size=(2, 2), strides=2),

    layers.Conv2D(512, kernel_size=(2, 2), activation='relu', input_shape=input_size),
    layers.MaxPool2D(pool_size=(2, 2), strides=2),

    layers.Flatten(),
    layers.Dropout(0.5),
    layers.Dense(4096, activation='relu'),
    layers.Dense(5, activation='softmax')
])

In [10]:
models.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 254, 254, 64)      640       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 127, 127, 64)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 126, 126, 128)     32896     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 63, 63, 128)      0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 61, 61, 256)       295168    
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 30, 30, 256)      0

In [11]:
models.compile(
        optimizer='adam',
        loss="sparse_categorical_crossentropy",
        # loss='categorical_crossentropy',
        metrics=['accuracy']
)

### 콜백 설정

In [12]:
# 콜백 설정
from keras.callbacks import EarlyStopping, ModelCheckpoint

outDir = './cheakpoint/lefms_model/' # 이 경로에 best 모델이 저장된다.
model_names = outDir + 'weights-{val_accuracy:.4f}.h5'

def get_callbacks(patience = 50):
    earlystop = EarlyStopping(monitor='val_accuracy', min_delta=0.0001, patience=patience)
    model_checkpoint = ModelCheckpoint(model_names, monitor='val_accuracy', verbose=1, save_best_only=True, period = 1)
  
    # callbacks = [earlystop, model_checkpoint]     # earlystop 사용하고 싶으면 이거 풀고 아래꺼 주석 처리
    callbacks = [model_checkpoint]
    return callbacks

# 학습하기

In [13]:
callbacks = get_callbacks()

models_hist = models.fit(
    train_ds,
    batch_size=BATCH_SIZE,
    epochs=EPOCH,
    validation_data=(val_ds),
    callbacks = [callbacks]
)

Epoch 1/100
Epoch 1: val_accuracy improved from -inf to 0.96736, saving model to ./cheakpoint/lefms_model\weights-0.9674.h5
Epoch 2/100
Epoch 2: val_accuracy improved from 0.96736 to 0.97766, saving model to ./cheakpoint/lefms_model\weights-0.9777.h5
Epoch 3/100
Epoch 3: val_accuracy improved from 0.97766 to 0.98126, saving model to ./cheakpoint/lefms_model\weights-0.9813.h5
Epoch 4/100
Epoch 4: val_accuracy improved from 0.98126 to 0.98241, saving model to ./cheakpoint/lefms_model\weights-0.9824.h5
Epoch 5/100
Epoch 5: val_accuracy improved from 0.98241 to 0.98370, saving model to ./cheakpoint/lefms_model\weights-0.9837.h5
Epoch 6/100
Epoch 6: val_accuracy did not improve from 0.98370
Epoch 7/100

# 결과 시각화 하기

In [None]:
# 학습 된 모델의 학습 과정 시각화
import matplotlib.pyplot as plt
def plot_model__hist(hist):
    path = './cheakpoint/lefms/' # loss, accuracy 그래프 저장할 path
    createDirectory(path)

    # loss 추이 그래프로 그려서 저장
    plt.figure(figsize=(6,6))
    plt.style.use("ggplot")
    plt.plot(hist.history['loss'], color='b', label="Training loss")
    plt.plot(hist.history['val_loss'], color='r', label="Validation loss")
    plt.savefig(path + 'model_loss_hist.png')
    plt.legend()
    plt.show()

    # accuracy 추이 그래프로 그려서 저장
    plt.figure(figsize=(6,6))
    plt.style.use("ggplot")
    plt.plot(hist.history['accuracy'], color='b', label="Training accuracy")
    plt.plot(hist.history['val_accuracy'], color='r',label="Validation accuracy")
    plt.savefig(path + 'model_loss_hist.png')
    plt.legend(loc = "lower right")
    plt.show()

In [None]:
plot_model__hist(models_hist)
loss,acc = models.evaluate(val_ds, verbose=2)
print("multi_model의 정확도: {:5.2f}%".format(100*acc))
print("multi_model의 Loss: {}".format(loss))

# 모델 불러와서 confusion matrix 그리기

In [None]:
# 모델 불러오기
reconstructed_model = keras.models.load_model("./cheakpoint/lefms_model/weights-0.9924.h5")

In [None]:
# 예측값 얻기
y_pred = reconstructed_model.predict(val_ds)

In [None]:
# one hat encoding 를 하나의 변수로 바꾸기
new_y= []
for val in y_test:
    max = 0
    cnt = 0
    for idx, num in enumerate(val):
        if max < num:
            max = num
            cnt = idx + 1
    new_y.append(cnt)

In [None]:
# one hat encoding 를 하나의 변수로 바꾸기
new_y_pred = []
for val in y_pred:
    max = 0
    cnt = 0
    for idx, num in enumerate(val):
        if max < num:
            max = num
            cnt = idx + 1
    new_y_pred.append(cnt)

In [None]:
# 최종 정확도 산출
score = reconstructed_model.evaluate(X_test, y_test, verbose=1)
print('정답률 = ', score[1],'loss=', score[0])

### confusion matrix 그리기

In [None]:
# 개수 버전
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score, f1_score
import seaborn as sns

cm2 = confusion_matrix(new_y, new_y_pred)
sns.heatmap(cm2, annot = True, fmt = 'd', cmap= 'Reds')
plt.xlabel('predict')
plt.ylabel('real')
plt.xticks([0.5, 1.5, 2.5, 3.5, 4.5], ['0 = N', '1 = S', '2 = V', '3 = F', '4 = Q'])
plt.yticks([0.5, 1.5, 2.5, 3.5, 4.5], ['0 = N', '1 = S', '2 = V', '3 = F', '4 = Q'])
plt.show()

In [None]:
# percentile 버전
total = np.sum(cm2, axis=1)
cm2_percentile = cm2/total[:,None]
sns.heatmap(np.round(cm2_percentile,3), annot = True, cmap= 'Reds')
plt.xlabel('predict')
plt.ylabel('real')
plt.xticks([0.5, 1.5, 2.5, 3.5, 4.5], ['0 = N', '1 = S', '2 = V', '3 = F', '4 = Q'])
plt.yticks([0.5, 1.5, 2.5, 3.5, 4.5], ['0 = N', '1 = S', '2 = V', '3 = F', '4 = Q'])
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(new_y, new_y_pred)

In [None]:
# classification_report 그리기
from sklearn.metrics import classification_report
target_names = ['0 = N', '1 = S', '2 = V', '3 = F', '4 = Q']
print(classification_report(new_y, new_y_pred, target_names=target_names))