In [1]:
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import cv2
import os
import tensorflow as tf
import json
import crop


from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import LearningRateScheduler





### _00: 여드름(acne)에 대해 bbox_crop

In [2]:
label_key = "_F_00.json"
annotation_key = "acne"

In [3]:
image_data, target_data = crop.process_files(label_key, annotation_key)

In [4]:
print(f"Number of images processed: {len(image_data)}")
print(f"Number of targets processed: {len(target_data)}")

Number of images processed: 2574
Number of targets processed: 2574


### 데이터 저장 및 불러오기

In [5]:
# 이미지, 타겟 데이터 저장

with open(f'crop_data/{annotation_key}_image_data.pkl', 'wb') as f:
    pickle.dump(image_data, f)

with open(f'crop_data/{annotation_key}_target_data.pkl', 'wb') as f:
    pickle.dump(target_data, f)

In [6]:
# 데이터 로드
with open(f'crop_data/{annotation_key}_image_data.pkl', 'rb') as f:
    image_data = pickle.load(f)

with open(f'crop_data/{annotation_key}_target_data.pkl', 'rb') as f:
    target_data = pickle.load(f)

In [None]:
from collections import Counter

# Null 값을 0으로 변환
target_data = {key: (value if value is not None else 0) for key, value in target_data.items()}

# 평탄화 함수
def flatten(lst):
    flat_list = []
    for item in lst:
        if isinstance(item, list):  # 중첩 리스트 처리
            flat_list.extend(flatten(item))
        elif isinstance(item, dict):  # 사전은 문자열로 변환
            flat_list.append(str(item))
        else:
            flat_list.append(item)
    return flat_list

# 데이터 평탄화
values_list = flatten(list(target_data.values()))

# 데이터 카운트
value_counts = Counter(values_list)

# for value, count in value_counts.items():
#     print(f"Value: {value}, Count: {count}")

In [20]:
# 데이터를 문자열로 변환하여 Count 계산
values_list = [str(value) if isinstance(value, (dict, list)) else value for value in target_data.values()]
value_counts = Counter(values_list)

# Count가 1인 항목 처리
for value, count in value_counts.items():
    if count == 1:
        for key in target_data:
            # 원래 값을 문자열로 변환하여 비교
            if str(target_data[key]) == value:
                target_data[key] = 1

# 결과 출력
print(target_data)

{'0002_01_F_00': 0, '0003_01_F_00': 1, '0006_01_F_00': 0, '0007_01_F_00': 0, '0008_01_F_00': 0, '0009_01_F_00': 0, '0010_01_F_00': 0, '0011_01_F_00': 0, '0012_01_F_00': 0, '0014_01_F_00': 0, '0016_01_F_00': 0, '0017_01_F_00': 0, '0018_01_F_00': 0, '0019_01_F_00': 0, '0020_01_F_00': 0, '0022_01_F_00': 0, '0023_01_F_00': 0, '0024_01_F_00': 0, '0025_01_F_00': 0, '0026_01_F_00': 0, '0027_01_F_00': 0, '0028_01_F_00': 0, '0029_01_F_00': 0, '0030_01_F_00': 0, '0031_01_F_00': 0, '0032_01_F_00': 0, '0033_01_F_00': 0, '0035_01_F_00': 0, '0036_01_F_00': 0, '0037_01_F_00': 0, '0038_01_F_00': 0, '0039_01_F_00': 0, '0040_01_F_00': 0, '0041_01_F_00': 1, '0042_01_F_00': 0, '0043_01_F_00': 0, '0044_01_F_00': 0, '0045_01_F_00': 0, '0046_01_F_00': 0, '0047_01_F_00': 0, '0048_01_F_00': 0, '0049_01_F_00': 0, '0051_01_F_00': 0, '0052_01_F_00': 0, '0054_01_F_00': 0, '0055_01_F_00': 0, '0057_01_F_00': 0, '0058_01_F_00': 1, '0060_01_F_00': 0, '0062_01_F_00': 0, '0064_01_F_00': 0, '0065_01_F_00': 0, '0066_01_F_

### 여드름 유무로 모델링

In [25]:
# 값이 0과 1인 항목의 개수 확인
count_0 = sum(1 for value in target_data.values() if value == 0)
count_1 = sum(1 for value in target_data.values() if value == 1)

# 결과 출력
print(f"Count of 0: {count_0}")
print(f"Count of 1: {count_1}")

Count of 0: 2056
Count of 1: 518


In [22]:
common_keys = set(image_data.keys()) & set(target_data.keys())

# 데이터 정렬
filtered_image_data = {key: image_data[key] for key in common_keys}
filtered_target_data = {key: target_data[key] for key in common_keys}

# X와 y 생성
X = np.array(list(filtered_image_data.values()))
y = np.array(list(filtered_target_data.values()))

print(f"Length of X: {len(X)}, Length of y: {len(y)}")

Length of X: 2574, Length of y: 2574


In [23]:
X = X / 255.0

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

X_train shape: (2059, 128, 128, 3), y_train shape: (2059,)
X_test shape: (515, 128, 128, 3), y_test shape: (515,)


In [24]:
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(X.shape[1], X.shape[2], X.shape[3])),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')  # 이진 분류 출력층
])

# 모델 컴파일
model.compile(loss='binary_crossentropy', 
              optimizer=Adam(learning_rate=0.0001), 
              metrics=['accuracy'])

# 모델 요약
model.summary()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 126, 126, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 63, 63, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 61, 61, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 30, 30, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 28, 28, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 14, 14, 128)      

In [26]:
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights = dict(enumerate(class_weights))

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)


In [27]:
# 모델 학습
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    class_weight = class_weights,
    callbacks=[early_stopping]
)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 19: early stopping


In [28]:
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

17/17 - 1s - loss: 0.2539 - accuracy: 0.8602 - 758ms/epoch - 45ms/step
Test Loss: 0.25392305850982666
Test Accuracy: 0.8601941466331482


In [29]:
# 예측 수행 (확률 값 반환)
predictions = model.predict(X_test)  # 이진 분류에서는 sigmoid 출력

# 0.5를 기준으로 클래스 결정
predicted_classes = (predictions > 0.5).astype(int).flatten()

print("Predicted Classes: ", predicted_classes)


Predicted Classes:  [1 0 1 0 0 1 0 0 1 1 0 1 0 0 0 1 1 0 1 0 1 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0
 0 0 1 0 0 0 0 0 0 1 1 1 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 1 0 0 0
 0 0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 1 0 0 0 1 1 0 1 0 0 0 1 0
 0 0 1 0 0 1 0 0 0 0 1 1 1 0 1 1 1 1 0 0 0 0 0 1 0 1 1 0 1 0 0 0 0 0 0 0 1
 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 1 0 0 0
 0 1 1 0 1 1 1 1 0 0 0 1 1 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0 1 1 0 1 1 1 1 1 1
 0 0 1 0 1 1 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0
 0 1 0 0 1 1 1 1 0 0 0 1 1 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0 0
 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 1 0 0 1 0 0 0 1 0 0 0 0 0 0 1 1 0 0 1 1 0 0
 0 1 0 1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1
 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 0
 0 0 0 1 1 0 1 1 0 1 0 0 0 1 0 1 1 1 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 0 1
 0 0 0 0 1 0 1 0 0 0 1 0 0 1 0 0 0 1 1 0 1 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0
 1 0 