In [None]:
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

In [None]:
!pip install tensorflow

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from collections import Counter
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from IPython.display import display, Javascript

In [None]:
# 런타임 오류 방지 함수
def keep_alive():
    display(Javascript('''
        function ClickConnect(){
            console.log("클릭 연결 버튼");
            document.querySelector("colab-connect-button").click()
        }
        setInterval(ClickConnect, 60000)
    '''))

# 데이터 로드 및 전처리 함수
def load_and_preprocess_data():
    # CSV 파일 로드
    df = pd.read_csv('/gdrive/MyDrive/Final project/1_Red/3_데이터수집_저장/0_데이터수집폴더/피부 데이터/json to df.csv')

    # 문자열로 저장된 딕셔너리를 실제 딕셔너리로 변환
    for col in ['info', 'images', 'annotations', 'equipment']:
        df[col] = df[col].apply(lambda x: eval(x) if isinstance(x, str) else x)

    # 훈련 데이터만 선택
    df = df[df['split'] == 'Training']

    # 데이터 전처리
    return preprocess_data(df)

# 데이터 전처리 함수
def preprocess_data(df):
    # annotations 처리
    def process_annotations(anno):
        if isinstance(anno, dict):
            # acne의 경우 리스트 길이를 값으로 사용
            if 'acne' in anno and isinstance(anno['acne'], list):
                anno['acne'] = len(anno['acne'])
            return anno
        return {}

    df['annotations'] = df['annotations'].apply(process_annotations)

    # equipment 처리
    df['equipment'] = df['equipment'].apply(lambda x: x if isinstance(x, dict) else {})

    # NaN 값을 0으로 채움
    df = df.fillna(0)

    # 눈가와 볼 데이터 합치기
    return merge_eye_cheek_data(df)

# 눈가와 볼 데이터 합치기 함수
def merge_eye_cheek_data(df):
    # 눈가(3,4)를 34로, 볼(5,6)을 56으로 통합
    df.loc[df['images'].apply(lambda x: x['facepart'] in [3, 4]), 'images'] = df['images'].apply(lambda x: {**x, 'facepart': 34} if x['facepart'] in [3, 4] else x)
    df.loc[df['images'].apply(lambda x: x['facepart'] in [5, 6]), 'images'] = df['images'].apply(lambda x: {**x, 'facepart': 56} if x['facepart'] in [5, 6] else x)

    # 통합된 데이터 처리
    for facepart in [34, 56]:
        df_part = df[df['images'].apply(lambda x: x['facepart'] == facepart)]

        # annotations 통합
        merged_annotations = {}
        for _, row in df_part.iterrows():
            for k, v in row['annotations'].items():
                if k not in merged_annotations:
                    merged_annotations[k] = v
                elif v > merged_annotations[k]:
                    merged_annotations[k] = v

        # equipment 통합
        merged_equipment = {}
        for _, row in df_part.iterrows():
            for k, v in row['equipment'].items():
                if k not in merged_equipment:
                    merged_equipment[k] = v
                elif v > merged_equipment[k]:
                    merged_equipment[k] = v

        # 통합된 데이터로 업데이트
        df.loc[df['images'].apply(lambda x: x['facepart'] == facepart), 'annotations'] = [merged_annotations] * len(df_part)
        df.loc[df['images'].apply(lambda x: x['facepart'] == facepart), 'equipment'] = [merged_equipment] * len(df_part)

    return df

# 오버샘플링 함수
def oversample_data(X, y):
    ros = RandomOverSampler(random_state=42)
    X_resampled, y_resampled = ros.fit_resample(X, y)
    return X_resampled, y_resampled

# 클래스 가중치 계산 함수
def compute_class_weights(y):
    classes = np.unique(y)
    weights = compute_class_weight('balanced', classes=classes, y=y)
    return dict(zip(classes, weights))

# 데이터 증강 함수
def augment_data(image):
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
    image = tf.image.rot90(image, k=tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32))
    return image

# 이미지 로드 및 전처리 함수
def load_and_preprocess_image(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [224, 224])
    img = tf.keras.applications.efficientnet.preprocess_input(img)
    return img

# 데이터 생성기
def create_data_generator(X, y, directory, batch_size=32, is_training=True):
    def gen():
        for i in range(len(X)):
            img_path = X.iloc[i]
            if isinstance(img_path, pd.Series):
                img_path = img_path.iloc[0]
            img = load_and_preprocess_image(img_path)
            label = y.iloc[i]
            yield img, label

    dataset = tf.data.Dataset.from_generator(
        gen,
        output_signature=(
            tf.TensorSpec(shape=(224, 224, 3), dtype=tf.float32),
            tf.TensorSpec(shape=(), dtype=tf.float32)
        )
    )

    if is_training:
        dataset = dataset.shuffle(buffer_size=len(X))

    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset


# 모델 생성 함수
def create_model(output_dim):
    base_model = tf.keras.applications.EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    x = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
    x = tf.keras.layers.Dense(256, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.4)(x)

    if output_dim == 1:  # 회귀
        output = tf.keras.layers.Dense(1)(x)
    else:  # 분류
        output = tf.keras.layers.Dense(output_dim, activation='softmax')(x)

    return tf.keras.Model(inputs=base_model.input, outputs=output)

# 모델 학습 함수
def train_model(model, train_data, val_data, output_dim, facepart, feature_name, model_type, epochs=10, batch_size=32):
    initial_lr = 1e-4
    optimizer = tf.keras.optimizers.Adam(learning_rate=initial_lr)

    if output_dim == 1:  # 회귀
        loss = 'mean_squared_error'
        metric = 'mae'
    else:  # 분류
        loss = 'sparse_categorical_crossentropy'
        metric = 'accuracy'

    model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=f'/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단/model/facepart_{facepart}_{feature_name}_{model_type}_checkpoint_{{epoch:02d}}.keras',
        save_best_only=True,
        save_weights_only=False,
        monitor='val_loss',
        mode='min',
        save_freq='epoch')

    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    history = model.fit(
        train_data,
        validation_data=val_data,
        epochs=epochs,
        verbose=1,
        callbacks=[checkpoint_callback, reduce_lr, early_stopping]
    )

    model.save(f'/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단/model/facepart_{facepart}_{feature_name}_{model_type}_final_model.keras')
    return history

# 성능 시각화 함수
def plot_performance(history, facepart, feature_name, model_type):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    # Loss plot
    ax1.plot(history.history['loss'], label='Train Loss')
    ax1.plot(history.history['val_loss'], label='Validation Loss')
    ax1.set_title(f'{feature_name} Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()

    # Metric plot
    metric = 'accuracy' if model_type == 'classification' else 'mae'
    ax2.plot(history.history[metric], label=f'Train {metric.upper()}')
    ax2.plot(history.history[f'val_{metric}'], label=f'Validation {metric.upper()}')
    ax2.set_title(f'{feature_name} {metric.upper()}')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel(metric.upper())
    ax2.legend()

    plt.tight_layout()
    plt.savefig(f'/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단/model/facepart_{facepart}_{feature_name}_{model_type}_performance.png')
    plt.close()

# facepart별 모델 학습 함수
def train_facepart_models(facepart, df, train_classification=True, train_regression=True):
    print(f"Processing facepart {facepart}")

    if facepart in [34, 56]:  # 눈가와 볼 통합
        facepart_df = df[df['images'].apply(lambda x: x['facepart'] in [3, 4] if facepart == 34 else [5, 6])]
    else:
        facepart_df = df[df['images'].apply(lambda x: x['facepart'] == facepart)]

    print(f"Number of rows for facepart {facepart}: {len(facepart_df)}")

    if facepart == 0:
        train_directory = '/gdrive/MyDrive/Final project/1_Red/3_데이터수집_저장/0_데이터수집폴더/피부 데이터/Training/01.원천데이터'
    else:
        train_directory = f'/gdrive/MyDrive/Final project/1_Red/4_데이터탐색_전처리/facepart별 피부 이미지/Training_cropped/{facepart}'

    X = facepart_df['info'].apply(lambda x: str(os.path.join(train_directory, x['filename'])))

    # 분류 모델 학습
    if train_classification:
        for feature in facepart_df['annotations'].iloc[0].keys():
            y = facepart_df['annotations'].apply(lambda x: x.get(feature, 0))
            if isinstance(y.iloc[0], list):  # acne의 경우 리스트 길이를 값으로 사용
                y = y.apply(len)

            if len(set(y)) > 1:  # 클래스가 2개 이상일 때만 학습
                X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

                # 오버샘플링
                X_train_resampled, y_train_resampled = oversample_data(pd.DataFrame(X_train), pd.DataFrame(y_train))

                train_generator = create_data_generator(X_train_resampled, y_train_resampled, train_directory)
                val_generator = create_data_generator(X_val, y_val, train_directory, is_training=False)

                print(f"Training classification model for feature: {feature}")
                print(f"Number of training samples: {len(X_train_resampled)}")
                print(f"Number of validation samples: {len(X_val)}")

                model = create_model(len(set(y)))
                history = train_model(model, train_generator, val_generator, len(set(y)), facepart, feature, 'classification')
                plot_performance(history, facepart, feature, 'classification')

    # 회귀 모델 학습
    if train_regression:
        regression_features = [
            'moisture', 'elasticity_R2', 'pigmentation_count', 'pore'
        ]

        for feature in regression_features:
            if feature in facepart_df['equipment'].iloc[0]:
                y = facepart_df['equipment'].apply(lambda x: x.get(feature, 0))

                X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

                train_generator = create_data_generator(X_train, y_train, train_directory)
                val_generator = create_data_generator(X_val, y_val, train_directory, is_training=False)

                print(f"Training regression model for feature: {feature}")
                print(f"Number of training samples: {len(X_train)}")
                print(f"Number of validation samples: {len(X_val)}")

                model = create_model(1)  # 회귀 모델은 출력이 1개
                history = train_model(model, train_generator, val_generator, 1, facepart, feature, 'regression')
                plot_performance(history, facepart, feature, 'regression')

# 메인 함수
if __name__ == "__main__":
    keep_alive()
    df = load_and_preprocess_data()

    user_input = input("처리할 facepart 범위를 선택하세요 (1: 0-2, 2: 34,56, 3: 7-8): ")
    if user_input == '1':
        facepart_range = [0, 1, 2]
    elif user_input == '2':
        facepart_range = [34, 56]
    elif user_input == '3':
        facepart_range = [7, 8]
    else:
        print("잘못된 입력입니다.")
        exit()

    train_class = input("분류 모델을 학습하시겠습니까? (y/n): ").lower() == 'y'
    train_reg = input("회귀 모델을 학습하시겠습니까? (y/n): ").lower() == 'y'

    for facepart in facepart_range:
        train_facepart_models(facepart, df, train_classification=train_class, train_regression=train_reg)