In [1]:
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import cv2
import os
import tensorflow as tf
import json
import crop


from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import LearningRateScheduler





### _06: 오른쪽 볼 모공(r_cheek_pore)에 대해 bbox_crop

In [2]:
label_key = "_F_06.json"
annotation_key = "r_cheek_pore"

In [3]:
image_data, target_data = crop.process_files(label_key, annotation_key)

In [4]:
print(f"Number of images processed: {len(image_data)}")
print(f"Number of targets processed: {len(target_data)}")

Number of images processed: 2574
Number of targets processed: 2574


### 데이터 저장 및 불러오기

In [5]:
# 이미지, 타겟 데이터 저장

with open(f'crop_data/{annotation_key}_image_data.pkl', 'wb') as f:
    pickle.dump(image_data, f)

with open(f'crop_data/{annotation_key}_target_data.pkl', 'wb') as f:
    pickle.dump(target_data, f)

In [6]:
# 데이터 로드
with open(f'crop_data/{annotation_key}_image_data.pkl', 'rb') as f:
    image_data = pickle.load(f)

with open(f'crop_data/{annotation_key}_target_data.pkl', 'rb') as f:
    target_data = pickle.load(f)

In [7]:
# 데이터 카운트
from collections import Counter

values_list = list(target_data.values())
value_counts = Counter(values_list)

for value, count in value_counts.items():
    print(f"Value: {value}, Count: {count}")

Value: 2, Count: 1569
Value: 1, Count: 450
Value: 4, Count: 132
Value: 3, Count: 321
Value: 5, Count: 30
Value: 0, Count: 72


### (0,1), (3,4,5)로 모델링

In [8]:
def merge_classes(target_data):
    binary_target_data = {}
    for key, value in target_data.items():
        if value in [0, 1]:
            binary_target_data[key] = '10'
        elif value in [3, 4, 5]:
            binary_target_data[key] = '20'
        else:
            binary_target_data[key] = value
    
    return binary_target_data

binary_target_data = merge_classes(target_data)

In [9]:
filtered_target_data = {key: value for key, value in binary_target_data.items() if value in ['10', '20']}

values_list = list(filtered_target_data.values())
value_counts = Counter(values_list)

for value, count in value_counts.items():
    print(f"Value: {value}, Count: {count}")

Value: 10, Count: 522
Value: 20, Count: 483


In [10]:
common_keys = set(image_data.keys()) & set(filtered_target_data.keys())

# 데이터 정렬
filtered_image_data = {key: image_data[key] for key in common_keys}
filtered_target_data = {key: filtered_target_data[key] for key in common_keys}

# X와 y 생성
X = np.array(list(filtered_image_data.values()))
y = np.array(list(filtered_target_data.values()))

print(f"Length of X: {len(X)}, Length of y: {len(y)}")

Length of X: 1005, Length of y: 1005


In [11]:
class_mapping = {'10': 0, '20': 1}
y = np.array([class_mapping[label] for label in y])
X = X / 255.0

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

X_train shape: (804, 128, 128, 3), y_train shape: (804,)
X_test shape: (201, 128, 128, 3), y_test shape: (201,)


In [12]:
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(X.shape[1], X.shape[2], X.shape[3])),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')  # 이진 분류 출력층
])

# 모델 컴파일
model.compile(loss='binary_crossentropy', 
              optimizer=Adam(learning_rate=0.0001), 
              metrics=['accuracy'])

# 모델 요약
model.summary()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 126, 126, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 63, 63, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 61, 61, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 30, 30, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 28, 28, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 14, 14, 128)      

In [13]:
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights = dict(enumerate(class_weights))

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)


In [14]:
# 모델 학습
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    class_weight = class_weights,
    callbacks=[early_stopping]
)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 60: early stopping


In [15]:
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

7/7 - 0s - loss: 0.4481 - accuracy: 0.8109 - 303ms/epoch - 43ms/step
Test Loss: 0.4480682909488678
Test Accuracy: 0.8109452724456787


In [16]:
# 예측 수행 (확률 값 반환)
predictions = model.predict(X_test)  # 이진 분류에서는 sigmoid 출력

# 0.5를 기준으로 클래스 결정
predicted_classes = (predictions > 0.5).astype(int).flatten()

print("Predicted Classes: ", predicted_classes)


Predicted Classes:  [1 1 1 0 1 0 0 1 0 1 1 1 1 0 0 1 0 0 1 1 1 0 0 1 0 0 1 0 0 0 1 0 1 0 1 0 1
 1 1 0 0 0 1 1 0 0 0 1 1 0 1 0 0 0 0 0 1 0 0 1 1 0 0 1 1 0 0 1 1 0 1 1 0 0
 0 0 0 0 1 1 1 0 1 0 1 0 0 1 1 1 1 0 0 0 0 1 1 0 0 0 0 0 0 0 1 1 1 1 0 1 1
 0 0 0 0 0 0 0 1 0 1 1 1 1 0 0 0 1 0 0 0 0 0 1 0 1 0 1 1 0 0 0 0 0 0 1 0 0
 0 0 1 0 1 1 0 1 0 1 0 0 0 1 1 1 1 0 0 0 0 0 1 0 0 1 1 1 0 1 1 0 0 0 1 1 1
 1 1 1 1 1 1 1 0 1 1 0 0 0 1 0 0]
