In [23]:
import os
import cv2
import json
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split


In [86]:
# 이미지 로드
def load_image(image_path):
    img = cv2.imread(image_path)
    if img is None:
        return None
    img = cv2.resize(img , (256,256))
    img = img / 255.0
    return img

In [87]:
# json 파일 로드
def load_json(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)

    age = data['info']['age']
    gender = 1 if data['info']['gender'] =='F' else 0 
    skin_type = data['info']['skin_type']
    sensitive = data['info']['sensitive']

    pigmentation = data['annotations']['forehead_pigmentation']
    
    return age, gender, skin_type, sensitive, pigmentation

In [88]:
from sklearn.model_selection import train_test_split

image_dir = r"D:\data\korean 01 data"
json_dir = r"D:\data\jsonfile"

images = []
metadata = []

for folder_name in os.listdir(image_dir):
    folder_path = os.path.join(image_dir, folder_name)
    if os.path.isdir(folder_path):  # 폴더인지 확인
        for filename in os.listdir(folder_path):
            if filename.endswith('.jpg'):  # .jpg 파일만 처리
                image_path = os.path.join(folder_path, filename)
                
                # JSON 파일명 생성: 'cropped_' 제거하고 '.jpg' -> '.json'
                json_filename = filename.replace('cropped_', '').replace('.jpg', '')
                json_path = os.path.join(json_dir, folder_name, json_filename)  # JSON 파일 경로
                
                # 이미지와 JSON 로드
                image = load_image(image_path)
                if image is None:
                    continue  # 이미지 로드 실패시 건너뛰기
                age, gender, skin_type, sensitive, pigmentation = load_json(json_path)
                
                images.append(image)  # images 리스트에 추가
                metadata.append([age, gender, skin_type, sensitive, pigmentation])  # metadata 리스트에 추가

# 리스트를 numpy 배열로 변환
images = np.array(images)
metadata = np.array(metadata)

X = images
y = metadata[:,-1] # target

X_metadata = metadata[:, :-1] # pigment 제외한 나머지 데이터


X_train,X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state = 42)

X_train_images = X_train
X_val_images = X_val

X_train_metadata = X_metadata[:len(X_train)]
X_val_metadata = X_metadata[len(X_train):]

In [94]:
import tensorflow as tf
from tensorflow.keras import layers, models, Input


image_input = Input(shape=(256,256,3), name= 'image_input')
x = layers.Conv2D(32,(3,3), activation = 'relu')(image_input)
x = layers.MaxPooling2D((2,2))(x)
x = layers.Conv2D(64,(3,3), activation = 'relu')(x)
x = layers.MaxPooling2D((2,2))(x)
x = layers.Flatten()(x)
x = layers.Dense(64, activation='relu')(x)


metadata_input = Input(shape=(4,), name='metadata_input')
y = layers.Dense(32, activation ='relu')(metadata_input)
y = layers.Dense(16, activation='relu')(y)

combined = layers.concatenate([x,y])
output = layers.Dense(6, activation='softmax')(combined)

model = models.Model(inputs =[image_input, metadata_input], outputs=output)
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()

Model: "model_8"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 image_input (InputLayer)    [(None, 256, 256, 3)]        0         []                            
                                                                                                  
 conv2d_22 (Conv2D)          (None, 254, 254, 32)         896       ['image_input[0][0]']         
                                                                                                  
 max_pooling2d_21 (MaxPooli  (None, 127, 127, 32)         0         ['conv2d_22[0][0]']           
 ng2D)                                                                                            
                                                                                                  
 conv2d_23 (Conv2D)          (None, 125, 125, 64)         18496     ['max_pooling2d_21[0][0]

In [97]:
# 모델 학습
history = model.fit(
    {'image_input': X_train_images, 'metadata_input': X_train_metadata},  # 학습 데이터
    y_train,  # 실제 레이블 (pigmentation 값)
    epochs=20,  # 에포크 수
    validation_data=(
        {'image_input': X_val_images, 'metadata_input': X_val_metadata},  # 검증 데이터
        y_val  # 검증 데이터의 실제 레이블 (pigmentation 값)
    )
)

# 학습이 완료되면 학습 과정에서의 손실과 메트릭스를 확인할 수 있습니다.
print("Training History: ", history.history)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Training History:  {'loss': [1.0082752704620361, 0.9771173000335693, 0.9813242554664612, 0.9700654745101929, 0.9911401867866516, 0.9450075626373291, 0.9204166531562805, 0.8850253224372864, 0.9002288579940796, 0.8711703419685364, 0.8249781131744385, 0.8131468892097473, 0.8018709421157837, 0.7770805358886719, 0.7748647928237915, 0.7353141903877258, 0.7083359360694885, 0.6976218819618225, 0.6998834013938904, 0.7481745481491089], 'accuracy': [0.5582901835441589, 0.590673565864563, 0.5621761679649353, 0.5777202248573303, 0.568652868270874, 0.5893782377243042, 0.5919688940048218, 0.6101036071777344, 0.6036269664764404, 0.6450777053833008, 0.6541450619697571, 0.6450777053833008, 0.6722797751426697, 0.6580311059951782, 0.6528497338294983, 0.7007771730422974, 0.704663

In [98]:
# 모델 평가
test_loss, test_accuracy = model.evaluate(
    {'image_input': X_val_images, 'metadata_input': X_val_metadata},  # 검증 데이터
    y_val  # 검증 데이터의 실제 레이블
)

print("Test Loss: ", test_loss)
print("Test Accuracy: ", test_accuracy)


Test Loss:  1.1867202520370483
Test Accuracy:  0.5803108811378479


In [90]:
images

array([[[[0.16470588, 0.29411765, 0.48627451],
         [0.32156863, 0.45882353, 0.63529412],
         [0.30980392, 0.45490196, 0.62745098],
         ...,
         [0.25882353, 0.40392157, 0.57647059],
         [0.32941176, 0.48235294, 0.67058824],
         [0.28235294, 0.44705882, 0.63529412]],

        [[0.2627451 , 0.39215686, 0.57647059],
         [0.31372549, 0.45098039, 0.62352941],
         [0.31372549, 0.45882353, 0.63137255],
         ...,
         [0.27843137, 0.41960784, 0.6       ],
         [0.3254902 , 0.4745098 , 0.6627451 ],
         [0.35294118, 0.50980392, 0.69411765]],

        [[0.23137255, 0.36470588, 0.54509804],
         [0.29019608, 0.42745098, 0.6       ],
         [0.20784314, 0.34509804, 0.51764706],
         ...,
         [0.29803922, 0.43921569, 0.61960784],
         [0.29803922, 0.44313725, 0.63529412],
         [0.34117647, 0.49411765, 0.66666667]],

        ...,

        [[0.38039216, 0.5372549 , 0.74509804],
         [0.37647059, 0.54117647, 0.74901961]

In [91]:
metadata

array([[55,  1,  3,  0,  1],
       [50,  1,  0,  0,  1],
       [24,  1,  0,  0,  1],
       ...,
       [23,  1,  1,  0,  1],
       [26,  1,  1,  1,  1],
       [28,  1,  3,  1,  1]])

In [92]:
import pandas as pd 
metadata_df = pd.DataFrame(metadata, columns = ['Age','Gender','Skin Type','sensitivity', 'pigmentation'])

print(metadata_df.to_string(index=False))

 Age  Gender  Skin Type  sensitivity  pigmentation
  55       1          3            0             1
  50       1          0            0             1
  24       1          0            0             1
  47       1          4            1             1
  55       1          3            0             1
  52       1          3            0             1
  40       1          1            0             0
  53       1          0            0             1
  59       1          3            0             1
  46       1          4            1             3
  53       1          3            0             1
  47       1          3            0             0
  46       1          3            0             0
  48       1          3            0             1
  42       1          4            1             1
  47       1          3            0             1
  49       1          0            0             1
  42       1          3            0             2
  56       1          3        

In [93]:
metadata_df['pigmentation'].value_counts()

pigmentation
1    494
0    196
2    184
3     80
4      7
5      4
Name: count, dtype: int64