In [None]:
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from IPython.display import display, Javascript

In [None]:
# 런타임 오류 방지 함수
def keep_alive():
    display(Javascript('''
        function ClickConnect(){
            console.log("클릭 연결 버튼");
            document.querySelector("colab-connect-button").click()
        }
        setInterval(ClickConnect, 60000)
    '''))

In [None]:
# 데이터 로드 및 전처리
def load_and_preprocess_data():
    df = pd.read_csv('/gdrive/MyDrive/Final project/1_Red/3_데이터수집_저장/0_데이터수집폴더/피부 원본 데이터/source_type.csv')
    for col in ['annotations', 'equipment','bbox']:
        df[col] = df[col].apply(lambda x: eval(x) if isinstance(x, str) else x)
    return df

# 이미지 로드 및 전처리 함수
def load_and_preprocess_image(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [224, 224])
    img = tf.keras.applications.efficientnet.preprocess_input(img)
    return img

In [None]:
# 데이터 생성기
def create_data_generator(X, y, batch_size=32, is_training=True):
    def gen():
        for i in range(len(X)):
            img_path = X.iloc[i]
            if os.path.exists(img_path):  # 파일 존재 여부 확인
                img = load_and_preprocess_image(img_path)
                label = y.iloc[i]
                yield img, label
            else:
                print(f"Skipping missing file: {img_path}")  # 누락된 파일 정보 출력

    dataset = tf.data.Dataset.from_generator(
        gen,
        output_signature=(
            tf.TensorSpec(shape=(224, 224, 3), dtype=tf.float32),
            tf.TensorSpec(shape=(), dtype=tf.float32)
        )
    )

    if is_training:
        dataset = dataset.shuffle(buffer_size=len(X))

    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

In [None]:
# 검증 데이터 생성기
def create_val_data_generator(X, y, directory, batch_size=32):
    return create_data_generator(X, y, directory, batch_size, is_training=False)

In [None]:
# 모델 생성 함수
def create_model(output_dim, model_type):
    base_model = tf.keras.applications.EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x_gap = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
    x_gmp = tf.keras.layers.GlobalMaxPooling2D()(base_model.output)
    x = tf.keras.layers.Concatenate()([x_gap, x_gmp])
    x = tf.keras.layers.Dense(256, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.4)(x)

    if model_type == 'classification':
        output = tf.keras.layers.Dense(output_dim, activation='softmax')(x)
    else:  # regression
        output = tf.keras.layers.Dense(1)(x)

    return tf.keras.Model(inputs=base_model.input, outputs=output)

In [None]:
# 성능 시각화 함수
def plot_performance(history, metric_name, facepart, feature, model_type):
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 5))

    # Loss plot
    ax1.plot(history.history['loss'], label='Train Loss')
    ax1.plot(history.history['val_loss'], label='Validation Loss')
    ax1.set_title(f'{feature} Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()

    # Metric plot
    ax2.plot(history.history[metric_name], label=f'Train {metric_name.upper()}')
    ax2.plot(history.history[f'val_{metric_name}'], label=f'Validation {metric_name.upper()}')
    ax2.set_title(f'{feature} {metric_name.upper()}')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel(metric_name.upper())
    ax2.legend()

    plt.tight_layout()
    plt.savefig(f'/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단/model/facepart_{facepart}_{feature}_{model_type}_performance.png')
    plt.close()

In [None]:
# 모델 훈련 함수
def train_model(model, train_data, val_data, facepart, feature, model_type, epochs=25, batch_size=8):
    initial_lr = 1e-4
    optimizer = tf.keras.optimizers.Adam(learning_rate=initial_lr)

    if model_type == 'regression':
        loss = 'mean_squared_error'
        metrics = ['mae']
    else:  # classification
        loss = 'sparse_categorical_crossentropy'
        metrics = ['accuracy']

    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=f'/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단/model/facepart_{facepart}_{feature}_{model_type}_checkpoint_{{epoch:02d}}.keras',
        save_best_only=True,
        save_weights_only=False,
        monitor='val_loss',
        mode='min',
        save_freq=10)

    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)

    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    history = model.fit(
        train_data,
        validation_data=val_data,
        epochs=epochs,
        verbose=1,
        callbacks=[checkpoint_callback, reduce_lr, early_stopping]
    )

    model.save(f'/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단/model/facepart_{facepart}_{feature}_{model_type}_final_model.keras')
    return history

In [None]:
# 이미지 경로 가져오기 함수(분류)
def get_image_path_classification(row, feature):
    facepart = row['facepart']
    filename = row['filename'].split('.')[0]
    class_value = row['annotations'][feature]
    source_type = row['source_type']

    if facepart == 0:
        base_path = '/gdrive/MyDrive/Final project/1_Red/3_데이터수집_저장/0_데이터수집폴더/피부 데이터/Training/01.원천데이터'
    else:
        facepart_names = ['','forehead','glabellus','l_perocular','r_perocular','l_cheek','r_cheek','lip','chin']
        base_path = f'/gdrive/MyDrive/Final project/1_Red/4_데이터탐색_전처리/facepart별 피부 이미지/classified_cropped/{facepart_names[facepart]}/{feature}/{source_type}/{class_value}'

    return str(os.path.join(base_path, f"{filename}_{facepart}.jpg"))

# 이미지 경로 가져오기 함수(회귀)
def get_image_path(row, feature):
    facepart = row['facepart']
    filename = row['filename'].split('.')[0]
    source_type = row['source_type']

    if facepart == 0:
        base_path = '/gdrive/MyDrive/Final project/1_Red/4_데이터탐색_전처리/facepart별 피부 이미지/facepart0_resized'
        return f"{base_path}/{row['filename']}"
    else:
        facepart_names = ['','forehead','glabellus','l_perocular','r_perocular','l_cheek','r_cheek','lip','chin']
        base_path = f'/gdrive/MyDrive/Final project/1_Red/4_데이터탐색_전처리/facepart별 피부 이미지/Training_cropped/{facepart}'
        return f"{base_path}/{filename}_{facepart}.jpg"

# bbox 유효성 검증 함수
def valid_bbox(bbox):
    if bbox is None:
        return False
    if isinstance(bbox, list) and len(bbox) == 4:
        if bbox == ['None', 'None', 'None', 'None']:
            return False
        return all(isinstance(b, int) and b >= 0 for b in bbox)
    return False

In [None]:
# facepart별 모델 훈련 함수
def train_facepart_models(facepart, train_classification=True, train_regression=True):
    print(f"Processing facepart {facepart}")

    facepart_df = df[df['facepart'] == facepart]
    facepart_df = facepart_df[facepart_df['bbox'].apply(valid_bbox)]

    if train_classification:
        for feature in facepart_df['annotations'].iloc[0].keys():
            model_path = f'/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단/model/facepart_{facepart}_{feature}_classification_final_model.keras'
            if os.path.exists(model_path):
                print(f"Skipping classification training for facepart {facepart}, feature {feature}. Model already exists.")
                continue

            y = facepart_df['annotations'].apply(lambda x: x.get(feature, None))

            if y.nunique() > 1:
                print(f"Starting classification training for facepart {facepart}, feature {feature}")

                # X = facepart_df.apply(lambda row: get_image_path(row, feature), axis=1)

                train_data = facepart_df[facepart_df['source_type'] == 'train']
                val_data = facepart_df[facepart_df['source_type'] == 'val']

                X_train = train_data.apply(lambda row: get_image_path(row, feature), axis=1)
                y_train = train_data['annotations'].apply(lambda x: x.get(feature, None))
                X_val = val_data.apply(lambda row: get_image_path(row, feature), axis=1)
                y_val = val_data['annotations'].apply(lambda x: x.get(feature, None))

                train_generator = create_data_generator(X_train, y_train)
                val_generator = create_data_generator(X_val, y_val)

                model = create_model(y.nunique(), 'classification')
                history = train_model(model, train_generator, val_generator, facepart, feature, 'classification')
                plot_performance(history, ['accuracy'], facepart, feature, 'classification')

    if train_regression:
        regression_features = ['forehead_moisture', 'r_cheek_moisture', 'l_cheek_moisture', 'chin_moisture',
                               'chin_elasticity_R2', 'r_cheek_elasticity_R2', 'l_cheek_elasticity_R2',
                               'forehead_elasticity_R2', 'pigmentation_count', 'r_cheek_pore', 'l_cheek_pore']

        for feature in regression_features:
            model_path = f'/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단/model/facepart_{facepart}_{feature}_regression_final_model.keras'
            if os.path.exists(model_path):
                print(f"Skipping regression training for facepart {facepart}, feature {feature}. Model already exists.")
                continue

            if feature in facepart_df['equipment'].iloc[0]:
                y = facepart_df['equipment'].apply(lambda x: x.get(feature, None))

                if not y.isnull().all():
                    print(f"Starting regression training for facepart {facepart}, feature {feature}")

                    train_data = facepart_df[facepart_df['source_type'] == 'train']
                    val_data = facepart_df[facepart_df['source_type'] == 'val']

                    X_train = train_data.apply(lambda row: get_image_path(row, feature), axis=1)
                    y_train = train_data['equipment'].apply(lambda x: x.get(feature, None))
                    X_val = val_data.apply(lambda row: get_image_path(row, feature), axis=1)
                    y_val = val_data['equipment'].apply(lambda x: x.get(feature, None))

                    train_generator = create_data_generator(X_train, y_train)
                    val_generator = create_data_generator(X_val, y_val)

                    model = create_model(1, 'regression')
                    history = train_model(model, train_generator, val_generator, facepart, feature, 'regression')
                    plot_performance(history, ['mae'], facepart, feature, 'regression')

In [None]:
# facepart0 모델 훈련 함수
def train_facepart0_model(df):
    print("Processing facepart 0")

    facepart0_df = df[df['facepart'] == 0]
    facepart0_df = facepart0_df[facepart0_df['bbox'].apply(valid_bbox)]

    for feature in ['skin_type', 'sensitive']:
        model_path = f'/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단/model/facepart_{facepart}_{feature}_classification_final_model.keras'
        if os.path.exists(model_path):
            print(f"Skipping classification training for facepart {facepart}, feature {feature}. Model already exists.")
            continue
        y = facepart0_df[feature]

        if y.nunique() > 1:
            print(f"Starting classification training for facepart 0, feature {feature}")

            train_data = facepart0_df[facepart0_df['source_type'] == 'train']
            val_data = facepart0_df[facepart0_df['source_type'] == 'val']

            X_train = train_data.apply(lambda row: get_image_path(row, feature), axis=1)
            y_train = train_data[feature]
            X_val = val_data.apply(lambda row: get_image_path(row, feature), axis=1)
            y_val = val_data[feature]

            train_generator = create_data_generator(X_train, y_train)
            val_generator = create_data_generator(X_val, y_val)

            model = create_model(y.nunique(), 'classification')
            history = train_model(model, train_generator, val_generator, 0, feature, 'classification')
            plot_performance(history, 'accuracy', 0, feature, 'classification')

In [None]:
# 메인 함수
if __name__ == "__main__":
    keep_alive()
    df = load_and_preprocess_data()
    user_input = input("처리할 facepart 범위를 선택하세요 (0: 0, 1: 1-2, 2: 3-6, 3: 7-8): ")
    if user_input == '0':
        facepart_range = [0]
    elif user_input == '1':
        facepart_range = [1, 2]
    elif user_input == '2':
        facepart_range = [3, 4, 5, 6]
    elif user_input == '3':
        facepart_range = [0,7,8]
    else:
        print("잘못된 입력입니다.")
        exit()

    train_class = input("분류 모델을 학습하시겠습니까? (y/n): ").lower() == 'y'
    train_reg = input("회귀 모델을 학습하시겠습니까? (y/n): ").lower() == 'y'

    for facepart in facepart_range:
        if facepart == 0:
            train_facepart0_model(df)
        else:
            train_facepart_models(facepart, train_classification=train_class, train_regression=train_reg)

# 테스트

In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_absolute_error, accuracy_score, classification_report

# 데이터 로드 및 전처리 함수
def load_and_preprocess_data():
    df = pd.read_csv('/gdrive/MyDrive/Final project/1_Red/3_데이터수집_저장/0_데이터수집폴더/피부 원본 데이터/source_type.csv')
    for col in ['annotations', 'equipment', 'bbox']:
        df[col] = df[col].apply(lambda x: eval(x) if isinstance(x, str) else x)
    return df

# 이미지 로드 및 전처리 함수
def load_and_preprocess_image(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [224, 224])
    img = tf.keras.applications.efficientnet.preprocess_input(img)
    return img

# 이미지 경로 가져오기 함수
def get_image_path(row, feature, facepart):
    filename = row['filename'].split('.')[0]
    facepart_names = ['', 'forehead', 'glabellus', 'perocular', 'perocular', 'cheek', 'cheek', 'lip', 'chin']
    if facepart == 0:
        base_path = '/gdrive/MyDrive/Final project/1_Red/4_데이터탐색_전처리/facepart별 피부 이미지/facepart0_resized'
        return os.path.join(base_path, f"{filename}.jpg")
    else:
        base_path = f'/gdrive/MyDrive/Final project/1_Red/4_데이터탐색_전처리/facepart별 피부 이미지/Training_cropped/{facepart}'
        return os.path.join(base_path, f"{filename}_{facepart}.jpg")

# 테스트 데이터 생성기
def create_test_data_generator(X, y, batch_size=32):
    def gen():
        for i in range(len(X)):
            img_path = X.iloc[i]
            if os.path.exists(img_path):
                img = load_and_preprocess_image(img_path)
                label = y.iloc[i]
                yield img, label
            else:
                print(f"Skipping missing file: {img_path}")

    dataset = tf.data.Dataset.from_generator(
        gen,
        output_signature=(
            tf.TensorSpec(shape=(224, 224, 3), dtype=tf.float32),
            tf.TensorSpec(shape=(), dtype=tf.float32)
        )
    )

    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset

# 회귀 모델 테스트 함수
def test_regression_model(facepart, feature):
    print(f"Testing regression model for facepart {facepart}, feature {feature}")

    # 데이터 로드
    df = load_and_preprocess_data()
    test_df = df[(df['facepart'] == facepart) & (df['source_type'] == 'test')]

    # 테스트 데이터 준비
    X_test = test_df.apply(lambda row: get_image_path(row, facepart), axis=1)
    y_test = test_df['equipment'].apply(lambda x: x.get(feature, None))

    # 데이터 생성기 생성
    test_generator = create_test_data_generator(X_test, y_test)

    # 모델 로드
    model_path = f'/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단/model/facepart_{facepart}_{feature}_regression_final_model.keras'
    model = load_model(model_path)

    # 예측
    predictions = model.predict(test_generator)

    # 성능 평가
    mae = mean_absolute_error(y_test, predictions)
    print(f"Mean Absolute Error: {mae}")

    return mae, y_test, predictions

# 분류 모델 테스트 함수
def test_classification_model(facepart, feature):
    print(f"Testing classification model for facepart {facepart}, feature {feature}")

    # 데이터 로드
    df = load_and_preprocess_data()
    test_df = df[(df['facepart'] == facepart) & (df['source_type'] == 'test')]

    # 테스트 데이터 준비
    X_test = test_df.apply(lambda row: get_image_path(row, feature, facepart), axis=1)
    y_test = test_df['annotations'].apply(lambda x: x.get(feature, None))

    # 데이터 생성기 생성
    test_generator = create_test_data_generator(X_test, y_test)

    # 모델 로드
    model_path = f'/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단/model/facepart_{facepart}_{feature}_classification_final_model.keras'
    model = load_model(model_path)

    # 예측
    predictions = model.predict(test_generator)
    predictions = np.argmax(predictions, axis=1)

    # 성능 평가
    accuracy = accuracy_score(y_test, predictions)
    print(f"Accuracy: {accuracy}")
    print(classification_report(y_test, predictions))

    return accuracy, y_test, predictions

# 메인 실행 부분
if __name__ == "__main__":
    # 회귀 모델 테스트 (forehead moisture)
    # regression_mae, y_true_reg, y_pred_reg = test_regression_model(1, 'forehead_elasticity_R2')

    # # 분류 모델 테스트 (glabellus wrinkle)
    classification_accuracy, y_true_cls, y_pred_cls = test_classification_model(0, 'sensitive')

# 얼굴부위 분류 및 피부진단 테스트

In [None]:
!pip install ultralytics

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
import tensorflow as tf

def resize_image(img, target_size):
    h, w = img.shape[:2]
    ratio = min(target_size/h, target_size/w)
    new_size = (int(w*ratio), int(h*ratio))
    resized = cv2.resize(img, new_size, interpolation=cv2.INTER_AREA)

    delta_w = target_size - new_size[0]
    delta_h = target_size - new_size[1]
    top, bottom = delta_h//2, delta_h-(delta_h//2)
    left, right = delta_w//2, delta_w-(delta_w//2)

    padded = cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0,0,0])
    return padded, (top, left), ratio

def load_skin_models(model_dir):
    models = {}
    for model_file in os.listdir(model_dir):
        if model_file.endswith("_final_model.keras"):
            model_path = os.path.join(model_dir, model_file)
            model = tf.keras.models.load_model(model_path)
            model_name = model_file.split("_final_model.keras")[0]
            models[model_name] = model
    return models

def predict_skin_condition(cropped_img, models):
    results = {}
    img = cv2.resize(cropped_img, (224, 224))
    img = tf.keras.applications.efficientnet.preprocess_input(img)
    img = np.expand_dims(img, axis=0)

    for model_name, model in models.items():
        prediction = model.predict(img)
        if 'classification' in model_name:
            results[model_name] = np.argmax(prediction[0])
        else:  # regression
            results[model_name] = prediction[0][0]

    return results

def analyze_face(image_path, yolo_model_path, skin_model_dir, target_size=640):
    # Load models
    yolo_model = YOLO(yolo_model_path)
    skin_models = load_skin_models(skin_model_dir)

    # Load and preprocess image
    img = cv2.imread(image_path)
    resized_img, (pad_top, pad_left), resize_ratio = resize_image(img, target_size)

    # Perform YOLO prediction
    results = yolo_model(resized_img)

    face_analysis = {}

    for r in results:
        boxes = r.boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0]

            # Calculate coordinates in original image
            x1 = max(0, int((x1 - pad_left) / resize_ratio))
            y1 = max(0, int((y1 - pad_top) / resize_ratio))
            x2 = min(img.shape[1], int((x2 - pad_left) / resize_ratio))
            y2 = min(img.shape[0], int((y2 - pad_top) / resize_ratio))

            # Get class name
            class_name = yolo_model.names[int(box.cls)]

            # Crop image
            if x1 < x2 and y1 < y2:
                cropped_img = img[y1:y2, x1:x2]

                if cropped_img.size > 0:
                    # Predict skin condition
                    skin_prediction = predict_skin_condition(cropped_img, skin_models)
                    face_analysis[class_name] = skin_prediction
                else:
                    print(f"Warning: Empty image generated for {class_name}")
            else:
                print(f"Warning: Invalid bounding box for {class_name}")

    return face_analysis

# Test function for a single image
def test_single_image(image_path, yolo_model_path, skin_model_dir):
    face_analysis = analyze_face(image_path, yolo_model_path, skin_model_dir)
    print("Face Analysis Results:")
    for part, predictions in face_analysis.items():
        print(f"\n{part.upper()}:")
        for feature, value in predictions.items():
            print(f"  {feature}: {value}")

# Test function for facepart1 regression models
def test_facepart1_regression(image_path, yolo_model_path, skin_model_dir):
    face_analysis = analyze_face(image_path, yolo_model_path, skin_model_dir)
    print("Facepart1 Regression Results:")
    for part, predictions in face_analysis.items():
        if part == "forehead":  # Assuming facepart1 corresponds to forehead
            for feature, value in predictions.items():
                if "regression" in feature:
                    print(f"  {feature}: {value}")

# Main function
if __name__ == "__main__":

    user_input = input("작업을 선택하세요 (1: 모델 훈련, 2: 단일 이미지 테스트, 3: facepart1 회귀 테스트): ")

    if user_input == '1':
        # Model training code (as before)
        ...

    elif user_input == '2':
        image_path = input("테스트할 이미지 경로를 입력하세요: ")
        yolo_model_path = '/gdrive/MyDrive/Final project/1_Red/5_분석모델링/YOLOv8/best.pt'
        skin_model_dir = '/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단/model'
        test_single_image(image_path, yolo_model_path, skin_model_dir)

    elif user_input == '3':
        image_path = input("테스트할 이미지 경로를 입력하세요: ")
        yolo_model_path = '/gdrive/MyDrive/Final project/1_Red/5_분석모델링/YOLOv8/best.pt'
        skin_model_dir = '/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단/model'
        test_facepart1_regression(image_path, yolo_model_path, skin_model_dir)

    else:
        print("잘못된 입력입니다.")

# facepart0 데이터 이미지 크기 줄이기 (가로 720을 기준으로 원본 비율)

In [None]:
from PIL import Image, ImageFile
import os

# 손상된 파일을 처리할 수 있도록 설정
ImageFile.LOAD_TRUNCATED_IMAGES = True

# 이미지가 저장된 기본 폴더 경로
base_folder_path = '/gdrive/MyDrive/Final project/1_Red/3_데이터수집_저장/0_데이터수집폴더/피부 원본 데이터/Training/01.원천데이터'

# 리사이즈된 이미지를 저장할 기본 폴더 경로 (기존 폴더를 사용할 수도 있음)
output_base_folder_path = '/gdrive/MyDrive/Final project/1_Red/4_데이터탐색_전처리/facepart별 피부 이미지/facepart0_resized'

# 가로 크기 기준
new_width = 720

# 출력 폴더가 존재하지 않으면 생성
if not os.path.exists(output_base_folder_path):
    os.makedirs(output_base_folder_path)

# 재귀적으로 폴더 탐색 및 이미지 리사이즈
for root, _, files in os.walk(base_folder_path):
    for filename in files:
        if filename.lower().endswith(('png', 'jpg', 'jpeg', 'bmp', 'gif')):
            # 이미지 경로
            img_path = os.path.join(root, filename)
            # 출력 폴더 경로
            relative_path = os.path.relpath(root, base_folder_path)
            output_folder_path = os.path.join(output_base_folder_path, relative_path)

            # 출력 폴더가 존재하지 않으면 생성
            if not os.path.exists(output_folder_path):
                os.makedirs(output_folder_path)

            # 출력 경로 설정
            output_path = os.path.join(output_folder_path, filename)

            # 이미 파일이 존재하면 넘어가기
            if os.path.exists(output_path):
                print(f'File already exists, skipping: {output_path}')
                continue

            try:
                # 이미지 열기
                img = Image.open(img_path)

                # 원본 크기
                orig_width, orig_height = img.size

                # 새로운 높이 계산 (원본 비율 유지)
                new_height = int((new_width / orig_width) * orig_height)

                # 이미지 리사이즈
                resized_img = img.resize((new_width, new_height), Image.ANTIALIAS)

                # 리사이즈된 이미지 저장
                resized_img.save(output_path)

                print(f'Resized and saved: {output_path}')

            except (OSError, IOError) as e:
                print(f'Error processing file {img_path}: {e}')
                continue


# 데이터프레임 전처리

In [None]:
# 데이터 로드 및 전처리
def load_and_preprocess_data():
    df = pd.read_csv('/gdrive/MyDrive/Final project/source_type.csv')
    for col in ['info','images','annotations', 'equipment']:
        df[col] = df[col].apply(lambda x: eval(x) if isinstance(x, str) else x)
    return df

# 이미지 로드 및 전처리 함수
def load_and_preprocess_image(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [224, 224])
    img = tf.keras.applications.efficientnet.preprocess_input(img)
    return img
df = load_and_preprocess_data()
df.head(3)

In [None]:
df[df['source_type'] == 'val']['id'].unique()

In [None]:
df.info()

In [None]:
info_df = df['info'].apply(pd.Series)

# images 컬럼을 분리하여 개별 컬럼으로
images_df = df['images'].apply(pd.Series)

# 원래의 info와 images 컬럼 제거
df = df.drop(['info', 'images'], axis=1)

# 새로운 컬럼들을 기존 데이터프레임에 병합
df = pd.concat([df, info_df, images_df], axis=1)

df.head(3)

In [None]:
df.to_csv('/gdrive/MyDrive/Final project/1_Red/3_데이터수집_저장/0_데이터수집폴더/피부 원본 데이터/source_type.csv',index=False)

In [None]:
df2 = pd.read_csv('/gdrive/MyDrive/Final project/1_Red/3_데이터수집_저장/0_데이터수집폴더/피부 원본 데이터/source_type.csv')

In [None]:
df2.info()