In [1]:
import os
import cv2
import json
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split





In [2]:
# 이미지 로드
def load_image(image_path):
    img = cv2.imread(image_path)
    if img is None:
        return None
    img = cv2.resize(img , (256,256))
    img = img / 255.0
    return img

In [8]:
# json 파일 로드
def load_json(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)

    gender = 1 if data['info']['gender'] =='F' else 0
    skin_type = data['info']['skin_type']
    sensitive = data['info']['sensitive']

    pigmentation = data['annotations']['forehead_wrinkle']

    return gender, skin_type, sensitive, pigmentation

In [9]:
from sklearn.model_selection import train_test_split
import os
import os

image_dir = r"D:\data\korean 01 data"
json_dir = r"D:\data\jsonfile"

images = []
metadata = []

four_or_five_path = []

for main_folder in os.listdir(image_dir):  # 'digit cam', 'pad', 'cell phone'
    main_folder_path = os.path.join(image_dir, main_folder)
    if os.path.isdir(main_folder_path):  # 메인 폴더인지 확인
        for sub_folder in os.listdir(main_folder_path):  # '0001', '0002', '0003'
            sub_folder_path = os.path.join(main_folder_path, sub_folder)
            if os.path.isdir(sub_folder_path):  # 서브 폴더인지 확인
                for filename in os.listdir(sub_folder_path):  # 이미지 파일들
                    if filename.endswith('.jpg'):  # .jpg 파일만 처리
                        image_path = os.path.join(sub_folder_path, filename)

                        # 여기서 특정 조건에 맞는 파일만 선택하도록 수정
                        if 'F' not in filename:  # L15가 포함된 파일만 선택
                            continue  # L15가 포함되지 않으면 건너뜀


                        # JSON 파일명 생성: 'cropped_' 제거하고 '.jpg' -> '.json'
                        json_filename = filename.replace('cropped_', '').replace('.jpg', '')

                        # json_dir도 image_dir처럼 동일한 폴더 구조를 반영하여 경로 설정
                        json_folder_path = os.path.join(json_dir, main_folder, sub_folder)
                        json_path = os.path.join(json_folder_path, json_filename)  # JSON 파일 경로


                        # 이미지와 JSON 로드
                        image = load_image(image_path)
                        if image is None:
                            continue  # 이미지 로드 실패시 건너뛰기
                        gender, skin_type, sensitive, pigmentation = load_json(json_path)

                        if pigmentation in [0,1,2,3,4,5,6]:
                            four_or_five_path.append(json_path)

                            if pigmentation in [0,1]:
                                pigmentation = 0
                            elif pigmentation in [2,3]:
                                pigmentation = 1
                            elif pigmentation in [4,5, 6]:
                                pigmentation = 2



                        images.append(image)  # images 리스트에 추가
                        metadata.append([gender, skin_type, sensitive, pigmentation])  # metadata 리스트에 추가


# 리스트를 numpy 배열로 변환
metadata = np.array(metadata)

# 데이터 준비
X = np.array(images)
X_metadata = np.array(metadata[:, :-1])  # pigment 제외한 나머지 메타데이터
y = metadata[:, -1]  # 타겟 (pigmentation)

# 이미지와 메타데이터를 함께 train_test_split에 전달
X_train, X_val, X_train_metadata, X_val_metadata, y_train, y_val = train_test_split(
    X, X_metadata, y, test_size=0.2, random_state=42,stratify = y
)

# 결과
X_train_images = X_train
X_val_images = X_val

In [10]:
import tensorflow as tf
from tensorflow.keras import layers, models, Input


image_input = Input(shape=(256, 256, 3), name='image_input')
x = layers.Conv2D(64, (3, 3), activation='relu')(image_input)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2, 2))(x)


x = layers.Conv2D(128, (3, 3), activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2, 2))(x)


x = layers.Conv2D(128, (3, 3), activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2, 2))(x)


x = layers.Conv2D(256, (3, 3), activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2, 2))(x)


x = layers.Conv2D(512, (3, 3), activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2, 2))(x)


x = layers.Conv2D(1024,(3,3), activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2,2))(x)


x = layers.GlobalAveragePooling2D()(x)  # Global Average Pooling

x = layers.Dense(1024, activation='relu')(x)  # 더 큰 Dense 레이어
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.5)(x)

metadata_input = Input(shape=(3,), name='metadata_input')
y = layers.Dense(128, activation='relu')(metadata_input)  # 더 큰 Dense 레이어
y = layers.BatchNormalization()(y)
y = layers.Dense(256, activation='relu')(y)
y = layers.BatchNormalization()(y)
y = layers.Dense(512, activation='relu')(y)
y = layers.BatchNormalization()(y)
y = layers.Dropout(0.5)(y)

combined = layers.concatenate([x, y])
output = layers.Dense(3, activation='softmax')(combined)

model = models.Model(inputs=[image_input, metadata_input], outputs=output)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) # 낮추기
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()




Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 image_input (InputLayer)    [(None, 256, 256, 3)]        0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 254, 254, 64)         1792      ['image_input[0][0]']         
                                                                                                  
 batch_normalization (Batch  (None, 254, 254, 64)         256       ['conv2d[0][0]']              
 Normalization)                                                                                   
                                                                                                  
 max_pooling2d (MaxPooling2  (None, 127, 127, 64)         0         ['batch_normalization[0]

In [11]:
# # 사전학습 모델 
# import tensorflow as tf
# from tensorflow.keras import layers, models, Input
# from tensorflow.keras.applications import ResNet50
# from tensorflow.keras.models import Model

# image_input = Input(shape=(224, 224, 3), name='image_input')

# # ResNet50을 사전학습된 가중치로 불러옴 (include_top=False는 분류용 마지막 레이어 제외)
# resnet = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))(image_input)
# x = layers.GlobalAveragePooling2D()(resnet)  # ResNet50의 출력은 GlobalAveragePooling2D로 처리

# # 메타데이터 입력
# metadata_input = Input(shape=(4,), name='metadata_input')
# y = layers.Dense(32, activation='relu')(metadata_input)
# y = layers.Dense(16, activation='relu')(y)

# # 두 입력을 결합
# combined = layers.concatenate([x, y])

# # 최종 출력 레이어
# output = layers.Dense(6, activation='softmax')(combined)

# # 모델 정의
# model2 = Model(inputs=[image_input, metadata_input], outputs=output)

# # 모델 컴파일
# model2.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# # 모델 요약
# model2.summary()


In [12]:
from tensorflow.keras.callbacks import EarlyStopping

# 조기 종료 콜백 설정
early_stopping = EarlyStopping(
    monitor='val_loss',  # val_loss를 모니터링
    patience=3,          # 개선되지 않은 에포크를 3번 참음
    restore_best_weights=True  # 최상의 모델 가중치를 복원
)

In [None]:
history = model.fit(
    [X_train_images, X_train_metadata],  # 두 입력을 리스트로 전달
    y_train,  # 타겟 라벨
    validation_data=([X_val_images, X_val_metadata], y_val),  # 검증 데이터도 동일하게 전달
    epochs=80,
    batch_size=64,
    validation_batch_size = 64, # 올려볼것 32 64
    #callbacks=[early_stopping]
)
# 학습이 완료되면 학습 과정에서의 손실과 메트릭스를 확인할 수 있습니다.

print("Training History: ", history.history)


Epoch 1/80


Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
 5/37 [===>..........................] - ETA: 6:07 - loss: 1.2551 - accuracy: 0.4812

In [18]:
# 모델 평가
test_loss, test_accuracy = model.evaluate(
    {'image_input': X_val_images, 'metadata_input': X_val_metadata},  # 검증 데이터
    y_val  # 검증 데이터의 실제 레이블
)

print("Test Loss: ", test_loss)
print("Test Accuracy: ", test_accuracy)

# 0.58 0.56
# 3으로 합친 후 : 0.569, stratify = y : 0.538
# 모든 데이터 합친 후 : 0.52

Test Loss:  1.1994446516036987
Test Accuracy:  0.5233160853385925


In [None]:
import pandas as pd
columns = ['Age', 'Gender', 'Skin_type', 'Sensitive','Pigmentation']
metadata_df = pd.DataFrame(metadata, columns =columns)

metadata_df

Unnamed: 0,Age,Gender,Skin_type,Sensitive,Pigmentation
0,55,1,3,0,3
1,50,1,0,0,1
2,24,1,0,0,0
3,47,1,4,1,1
4,55,1,3,0,3
...,...,...,...,...,...
2890,25,1,1,0,1
2891,24,1,1,0,1
2892,23,1,1,0,1
2893,26,1,1,1,1


In [None]:
metadata_df['Pigmentation'].value_counts()

Pigmentation
1    1062
2     696
3     480
4     228
5     177
6     135
0     117
Name: count, dtype: int64