In [1]:
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import cv2
import os
import tensorflow as tf
import json


from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping




### 이미지 불러오기

In [2]:
# 데이터가 저장된 최상위 경로
base_dir = r"D:\final_project_backup\FINAL_DATA\한국인 피부상태 측정 데이터\Training"
image_base_dir_camera = os.path.join(base_dir, "images", "camera")
label_base_dir_camera = os.path.join(base_dir, "labels", "camera")
image_base_dir_pad = os.path.join(base_dir, "images", "pad")
label_base_dir_pad = os.path.join(base_dir, "labels", "pad")
image_base_dir_phone = os.path.join(base_dir, "images", "phone")
label_base_dir_phone = os.path.join(base_dir, "labels", "phone")

In [3]:
# 딕셔너리에 결과 저장
image_data = {}  # 이미지 배열 데이터 {파일명: 이미지 배열}
target_data = {}  # 타겟 값 {파일명: 타겟 값}
# 리사이즈 크기 설정
target_size = (128, 128)

### _02 : 미간주름에 대해 bbox_crop

In [4]:
def find_image_path(base_dirs, id_folder, image_filename):
    for base_dir in base_dirs:
        image_path = os.path.join(base_dir, id_folder, image_filename)
        if os.path.exists(image_path):
            return image_path  # 이미지 경로를 찾으면 반환
    return None  # 모든 경로에서 이미지 파일을 찾지 못하면 None 반환

In [5]:
# 각 JSON 디렉토리 순회

label_dirs = [label_base_dir_camera, label_base_dir_pad, label_base_dir_phone]
base_dirs = [image_base_dir_camera, image_base_dir_pad, image_base_dir_phone]

for label_dir in label_dirs:
    for root, _, files in os.walk(label_dir):
        for file in files:
            if file.endswith("_F_02.json"):
                label_path = os.path.join(root, file)
                
                try:
                    # JSON 파일 로드
                    with open(label_path, 'r', encoding='utf-8') as f:
                        label_data = json.load(f)
                    
                    # JSON 데이터에서 이미지 파일명 및 id_folder 추출
                    image_filename = label_data['info']['filename']
                    id_folder = label_data['info']['id']
                    
                    # 이미지 경로 찾기
                    image_path = find_image_path(base_dirs, id_folder, image_filename)
                    if image_path is None:
                        print(f"Image file not found for {label_path}: {image_filename}")
                        continue
                    
                    # 이미지 파일 읽기
                    with open(image_path, 'rb') as img_file:
                        file_data = np.asarray(bytearray(img_file.read()), dtype=np.uint8)
                        image = cv2.imdecode(file_data, cv2.IMREAD_COLOR)
                    
                    if image is None:
                        print(f"Failed to decode image: {image_path}")
                        continue
                    
                    # bbox 유효성 검사
                    bbox = label_data['images']['bbox']
                    x_min, y_min, x_max, y_max = map(int, bbox)
                    if x_min >= x_max or y_min >= y_max:
                        print(f"Invalid bbox in file {label_path}: {bbox}")
                        continue
                    
                    # 이미지 크롭 및 리사이즈
                    cropped_image = image[y_min:y_max, x_min:x_max]
                    resized_image = cv2.resize(cropped_image, target_size)
                    
                    # 데이터 저장
                    key = os.path.splitext(file)[0]  # 파일명에서 확장자 제거
                    image_data[key] = resized_image
                    target_data[key] = label_data['annotations']['glabellus_wrinkle']
                
                except Exception as e:
                    print(f"Error processing file {label_path}: {e}")


In [6]:
print(f"Number of images processed: {len(image_data)}")
print(f"Number of targets processed: {len(target_data)}")

Number of images processed: 2574
Number of targets processed: 2574


### 데이터 저장 및 불러오기

In [7]:
import pickle

# 이미지 데이터 저장
with open('glabellus_wrinkle_image_data.pkl', 'wb') as f:
    pickle.dump(image_data, f)

# 타겟 데이터 저장
with open('glabellus_wrinkle_target_data.pkl', 'wb') as f:
    pickle.dump(target_data, f)

print("Data saved successfully!")

Data saved successfully!


In [8]:
# 데이터 로드
with open('glabellus_wrinkle_image_data.pkl', 'rb') as f:
    image_data = pickle.load(f)

with open('glabellus_wrinkle_target_data.pkl', 'rb') as f:
    target_data = pickle.load(f)

In [9]:
from collections import Counter

values_list = list(target_data.values())
value_counts = Counter(values_list)

for value, count in value_counts.items():
    print(f"Value: {value}, Count: {count}")

Value: 1, Count: 1110
Value: 0, Count: 540
Value: 2, Count: 312
Value: 3, Count: 270
Value: 5, Count: 168
Value: 4, Count: 93
Value: 6, Count: 81


### 0과 2,3을 묶어서 모델링

In [10]:
def merge_classes(target_data):
    binary_target_data = {}
    for key, value in target_data.items():
        if value in [2, 3]:
            binary_target_data[key] = '10'
        else:
            binary_target_data[key] = value
    
    return binary_target_data

binary_target_data = merge_classes(target_data)

In [11]:
filtered_target_data = {key: value for key, value in binary_target_data.items() if value in [0, '10']}


values_list = list(filtered_target_data.values())
value_counts = Counter(values_list)

for value, count in value_counts.items():
    print(f"Value: {value}, Count: {count}")

Value: 0, Count: 540
Value: 10, Count: 582


In [12]:
common_keys = set(image_data.keys()) & set(filtered_target_data.keys())

# 데이터 정렬
filtered_image_data = {key: image_data[key] for key in common_keys}
filtered_target_data = {key: filtered_target_data[key] for key in common_keys}

# X와 y 생성
X = np.array(list(filtered_image_data.values()))
y = np.array(list(filtered_target_data.values()))

print(f"Length of X: {len(X)}, Length of y: {len(y)}")

Length of X: 1122, Length of y: 1122


In [13]:
class_mapping = {'0': 0, '10': 1}
y = np.array([class_mapping[label] for label in y])
X = X / 255.0

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

X_train shape: (897, 128, 128, 3), y_train shape: (897,)
X_test shape: (225, 128, 128, 3), y_test shape: (225,)


In [15]:
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(X.shape[1], X.shape[2], X.shape[3])),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')  # 출력층
])

# 모델 컴파일
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])

# 모델 요약
model.summary()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 126, 126, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 63, 63, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 61, 61, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 30, 30, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 28, 28, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 14, 14, 128)      

In [16]:
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights = dict(enumerate(class_weights))

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

In [17]:
# 모델 학습
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    class_weight = class_weights,
    callbacks=[early_stopping]
)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 

In [18]:
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

8/8 - 0s - loss: 0.4032 - accuracy: 0.8578 - 357ms/epoch - 45ms/step
Test Loss: 0.40316224098205566
Test Accuracy: 0.8577777743339539
