In [None]:
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

In [None]:
!pip install tensorflow

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler
from tensorflow.keras.utils import to_categorical
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split

In [None]:
# 런타임 오류 방지 함수
# 이 함수는 Colab 연결을 유지하기 위해 60초마다 연결 버튼을 자동으로 클릭
def keep_alive():
    display(Javascript('''
        function ClickConnect(){
            console.log("클릭 연결 버튼");
            document.querySelector("colab-connect-button").click()
        }
        setInterval(ClickConnect, 60000)
    '''))

In [None]:
# 이 클래스는 데이터를 불러오고 처리하는 방법을 정의 -> 이미지와 라벨 데이터를 캐시하여 빠른 접근을 가능
# img_dir은 이미지가 저장된 폴더 경로, df는 데이터프레임, facepart는 얼굴 부위 번호
# 데이터셋 클래스 정의

class CachedDataset:
    def __init__(self, img_dir, df, facepart, task, cache_dir='/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단/cache'):
        self.img_dir = img_dir
        self.facepart = facepart
        self.task = task
        os.makedirs(cache_dir, exist_ok=True)
        self.cache_file = os.path.join(cache_dir, f'cache_facepart_{facepart}_{task}.npy')

        if os.path.exists(self.cache_file):
            print(f"facepart {facepart}의 {task} 캐시된 데이터를 불러옵니다...")
            self.cache = np.load(self.cache_file, allow_pickle=True)
        else:
            print(f"facepart {facepart}의 {task} 캐시를 생성합니다...")
            self.cache = self._create_cache(df)
            np.save(self.cache_file, self.cache)
            print(f"facepart {facepart}의 {task} 캐시가 생성되고 저장되었습니다")

        self.class_weights = self._compute_class_weights()
        self.datagen = ImageDataGenerator(
            vertical_flip=True,
            rotation_range=20,
            brightness_range=[0.8, 1.2],
            validation_split=0.1
        )

    def _create_cache(self, df):
        cache = []
        df_facepart = df[df['images'].apply(lambda x: x['facepart'] == self.facepart)]

        for idx, row in df_facepart.iterrows():
            try:
                bbox = row['images']['bbox']
                if not isinstance(bbox, list) or len(bbox) != 4 or not all(isinstance(b, (int, float)) for b in bbox):
                    continue

                img_name = row['info']['filename']
                if self.facepart == 0:
                    img_path = os.path.join(self.img_dir, img_name)
                else:
                    img_path = os.path.join(self.img_dir, f"{os.path.splitext(img_name)[0]}_{self.facepart}.jpg")

                if not os.path.exists(img_path):
                    continue

                labels = self._prepare_labels(row['annotations'], row['equipment'], row['info'])
                if labels is not None:
                    cache.append((img_path, labels))

            except Exception as e:
                print(f"데이터 처리 중 오류 발생: {str(e)}")
                continue

        return cache

    def _prepare_labels(self, annotations, equipment, info):
        labels = []
        try:
            if self.task == 'classification':
                if self.facepart == 0:
                    labels = [info['skin_type'], info['sensitive']]
                elif annotations:
                    labels = list(annotations.values())
                labels = [int(label) if isinstance(label, (int, float)) else 0 for label in labels]
                labels = np.array(labels)
            elif self.task == 'regression':
                if equipment:
                    labels = list(equipment.values())
                labels = [float(label) if isinstance(label, (int, float)) else 0.0 for label in labels]
                labels = np.array(labels)

            if labels.size == 0:
                return None
        except Exception as e:
            print(f"레이블 준비 중 오류 발생: {str(e)}")
            return None
        return labels

    def _compute_class_weights(self):
        if self.task == 'classification':
            all_labels = np.concatenate([label for _, label in self.cache])
            class_weights = compute_class_weight('balanced', classes=np.unique(all_labels), y=all_labels)
            return dict(enumerate(class_weights))
        return None

    def __len__(self):
        return len(self.cache)

    def __getitem__(self, idx):
        img_path, label = self.cache[idx]
        image = load_img(img_path, target_size=(224, 224))
        image = img_to_array(image)
        image = preprocess_input(image)
        return image, label

    def get_num_classes(self):
        if self.task == 'classification':
            all_labels = np.concatenate([label for _, label in self.cache])
            return np.max(all_labels) + 1
        else:
            return self.cache[0][1].shape[0] if len(self.cache) > 0 else 0

    def get_generator(self, subset):
        while True:
            for img_path, label in self.cache:
                image = load_img(img_path, target_size=(224, 224))
                image = img_to_array(image)
                image = preprocess_input(image)
                image = self.datagen.random_transform(image)
                yield image, label

In [None]:
# ResNet50 모델 생성 함수
# 사전 학습된 ResNet50 모델을 로드, 마지막 fully connected 층을 num_outputs에 맞게 수정
# dropout 층을 추가하여 과적합을 방지
def create_resnet_model(num_outputs, task):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dropout(0.5)(x)
    if task == 'classification':
        outputs = Dense(num_outputs, activation='softmax')(x)
    else:  # regression
        outputs = Dense(num_outputs, activation='linear')(x)
    model = Model(inputs=base_model.input, outputs=outputs)
    return model

# 현재 iteration(현재 모델이 몇 번째 반복을 수행 중인지)에 따라 학습률을 감소
# 학습이 진행됨에 따라 학습률을 점진적으로 줄여나가는 역할
# 학습 초기에는 큰 학습률로 빠르게 학습하다가, 학습이 진행될수록 작은 학습률로 세밀하게 조정
# 모델이 더 안정적으로 수렴하도록 도움
def lr_poly(base_lr, iter, max_iter, power):
    return base_lr * ((1 - float(iter) / max_iter) ** power)

In [None]:
# 모델을 학습, 검증, 체크포인트 저장, 학습 과정 시각화
def train_model(model, train_dataset, val_dataset, num_epochs, facepart, task):
    save_path = '/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단/model'

    def lr_scheduler(epoch, lr):
        return lr_poly(1e-3, epoch, num_epochs, 0.9)

    callbacks = [
        ModelCheckpoint(os.path.join(save_path, f'best_model_resnet50_facepart_{facepart}_{task}.h5'),
                        save_best_only=True, monitor='val_loss'),
        EarlyStopping(patience=10, restore_best_weights=True),
        LearningRateScheduler(lr_scheduler),
        ModelCheckpoint(os.path.join(save_path, f'checkpoint_resnet50_facepart_{facepart}_{task}_epoch_{{epoch:02d}}.h5'),
                        save_weights_only=True, period=10)
    ]

    optimizer = Adam(learning_rate=1e-3)
    if task == 'classification':
        model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    else:  # regression
        model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

    history = model.fit(
        train_dataset.get_generator('training'),
        steps_per_epoch=len(train_dataset) // 32,
        validation_data=val_dataset.get_generator('validation'),
        validation_steps=len(val_dataset) // 32,
        epochs=num_epochs,
        callbacks=callbacks,
        verbose=1,
        class_weight=train_dataset.class_weights if task == 'classification' else None
    )

    # 학습 과정 시각화 및 저장
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title(f'Loss for ResNet50 Facepart {facepart} {task}')
    plt.legend()
    plt.xticks(range(0, num_epochs+1, 5))

    plt.subplot(1, 2, 2)
    metric_key = 'accuracy' if task == 'classification' else 'mae'
    plt.plot(history.history[metric_key], label=f'Train {metric_key.capitalize()}')
    plt.plot(history.history[f'val_{metric_key}'], label=f'Validation {metric_key.capitalize()}')
    plt.xlabel('Epochs')
    plt.ylabel(metric_key.capitalize())
    plt.title(f'{metric_key.capitalize()} for ResNet50 Facepart {facepart} {task}')
    plt.legend()
    plt.xticks(range(0, num_epochs+1, 5))

    plt.tight_layout()
    plt.savefig(os.path.join(save_path, f'plot_resnet50_facepart_{facepart}_{task}.png'))
    plt.close()

    return model

In [None]:
# 메인함수
def main(facepart_range):
    base_path = '/gdrive/MyDrive/Final project/1_Red/3_데이터수집_저장/0_데이터수집폴더/피부 데이터'

    try:
        df = pd.read_csv(os.path.join(base_path, 'json to df.csv'))
    except FileNotFoundError:
        print("CSV 파일을 찾을 수 없습니다. 경로를 확인해주세요.")
        return
    except pd.errors.EmptyDataError:
        print("CSV 파일이 비어있습니다.")
        return
    except pd.errors.ParserError:
        print("CSV 파일 파싱 중 오류가 발생했습니다. 파일 형식을 확인해주세요.")
        return

    for col in ['info', 'images', 'annotations', 'equipment']:
        df[col] = df[col].apply(lambda x: eval(x) if isinstance(x, str) else x)

    df_train = df[df['split'] == 'Training']

    for facepart in facepart_range:
        for task in ['classification', 'regression']:
            if task == 'regression' and facepart in [2, 7]:
                continue

            print(f"facepart {facepart} {task} 처리 중")

            if facepart == 0:
                img_dir = '/gdrive/MyDrive/Final project/1_Red/3_데이터수집_저장/0_데이터수집폴더/피부 데이터/Training/01.원천데이터'
            else:
                img_dir = f'/gdrive/MyDrive/Final project/1_Red/4_데이터탐색_전처리/facepart별 피부 이미지/Training_cropped/{facepart}'

            if not os.path.exists(img_dir):
                print(f"이미지 디렉토리를 찾을 수 없습니다: {img_dir}")
                continue

            try:
                dataset = CachedDataset(img_dir, df_train, facepart, task)
                if len(dataset) == 0:
                    print(f"facepart {facepart}의 {task} 데이터셋이 비어 있습니다. 다음으로 진행합니다.")
                    continue

                # 모델 생성
                num_classes = dataset.get_num_classes()
                model = create_resnet_model(num_classes, task)

                # 데이터 분할
                train_size = int(0.9 * len(dataset))
                val_size = len(dataset) - train_size
                train_dataset, val_dataset = train_test_split(dataset, test_size=0.1, random_state=0)

                # 모델 학습
                model = train_model(model, train_dataset, val_dataset, num_epochs=100,
                                    facepart=facepart, task=task)

            except Exception as e:
                print(f"처리 중 오류 발생: {str(e)}")
                continue

            # 최종 모델 저장
            save_path = '/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단'
            try:
                model.save(os.path.join(save_path, f'final_model_resnet50_facepart_{facepart}_{task}.h5'))
            except Exception as e:
                print(f"모델 저장 중 오류 발생: {str(e)}")

In [None]:
# 메인 실행
if __name__ == "__main__":
    keep_alive()
    user_input = input("처리할 facepart 범위를 선택하세요 (1: 0-2, 2: 3-6, 3: 7-8): ")
    if user_input == '1':
        facepart_range = range(0, 3)
    elif user_input == '2':
        facepart_range = range(3, 7)
    elif user_input == '3':
        facepart_range = range(7, 9)
    else:
        print("잘못된 입력입니다. 프로그램을 종료합니다.")
        exit()

    main(facepart_range)

In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler

from google.colab import drive
drive.mount('/gdrive', force_remount=True)

# 런타임 오류 방지 함수
# 이 함수는 Colab 연결을 유지하기 위해 60초마다 연결 버튼을 자동으로 클릭
def keep_alive():
    display(Javascript('''
        function ClickConnect(){
            console.log("클릭 연결 버튼");
            document.querySelector("colab-connect-button").click()
        }
        setInterval(ClickConnect, 60000)
    '''))

class CachedDataset:
    def __init__(self, img_dir, df, facepart, task, cache_dir='/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단/cache'):
        self.img_dir = img_dir
        self.facepart = facepart
        self.task = task
        os.makedirs(cache_dir, exist_ok=True)
        self.cache_file = os.path.join(cache_dir, f'cache_facepart_{facepart}_{task}.npy')

        if os.path.exists(self.cache_file):
            print(f"facepart {facepart}의 {task} 캐시된 데이터를 불러옵니다...")
            try:
                self.cache = np.load(self.cache_file, allow_pickle=True)
            except Exception as e:
                print(f"캐시 파일 로딩 중 오류 발생: {str(e)}")
                print("캐시를 새로 생성합니다...")
                self.cache = self._create_cache(df)
                np.save(self.cache_file, self.cache)
        else:
            print(f"facepart {facepart}의 {task} 캐시를 생성합니다...")
            self.cache = self._create_cache(df)
            np.save(self.cache_file, self.cache)
            print(f"facepart {facepart}의 {task} 캐시가 생성되고 저장되었습니다")

        self.class_weights = self._compute_class_weights()
        self.datagen = ImageDataGenerator(
            vertical_flip=True,
            rotation_range=20,
            brightness_range=[0.8, 1.2],
            validation_split=0.1
        )

        if self.task == 'classification':
            self._balance_classes()

    def _create_cache(self, df):
        cache = []
        df_facepart = df[df['images'].apply(lambda x: x['facepart'] == self.facepart)]

        for idx, row in df_facepart.iterrows():
            try:
                bbox = row['images']['bbox']
                if not isinstance(bbox, list) or len(bbox) != 4 or not all(isinstance(b, (int, float)) for b in bbox):
                    continue

                img_name = row['info']['filename']
                if self.facepart == 0:
                    img_path = os.path.join(self.img_dir, img_name)
                else:
                    img_path = os.path.join(self.img_dir, f"{os.path.splitext(img_name)[0]}_{self.facepart}.jpg")

                if not os.path.exists(img_path):
                    continue

                labels = self._prepare_labels(row['annotations'], row['equipment'], row['info'])
                if labels is not None:
                    cache.append((img_path, labels))

            except Exception as e:
                print(f"데이터 처리 중 오류 발생: {str(e)}")
                continue

        return cache

    def _prepare_labels(self, annotations, equipment, info):
        labels = []
        try:
            if self.task == 'classification':
                if self.facepart == 0:
                    labels = [info['skin_type'], info['sensitive']]
                elif annotations:
                    labels = list(annotations.values())
                labels = [int(label) if isinstance(label, (int, float)) else 0 for label in labels]
                labels = np.array(labels)
            elif self.task == 'regression':
                if equipment:
                    labels = list(equipment.values())
                labels = [float(label) if isinstance(label, (int, float)) else 0.0 for label in labels]
                labels = np.array(labels)

            if labels.size == 0:
                return None
        except Exception as e:
            print(f"레이블 준비 중 오류 발생: {str(e)}")
            return None
        return labels

    def _compute_class_weights(self):
        if self.task == 'classification':
            all_labels = np.concatenate([label for _, label in self.cache])
            class_weights = compute_class_weight('balanced', classes=np.unique(all_labels), y=all_labels)
            return dict(enumerate(class_weights))
        return None

    def _balance_classes(self):
        if self.task == 'classification':
            X = np.array([img_path for img_path, _ in self.cache])
            y = np.array([label for _, label in self.cache])

            ros = RandomOverSampler(random_state=42)
            X_resampled, y_resampled = ros.fit_resample(X.reshape(-1, 1), y)

            self.cache = [(X_resampled[i][0], y_resampled[i]) for i in range(len(X_resampled))]

    def __len__(self):
        return len(self.cache)

    def __getitem__(self, idx):
        img_path, label = self.cache[idx]
        image = load_img(img_path, target_size=(224, 224))
        image = img_to_array(image)
        image = preprocess_input(image)
        return image, label

    def get_num_classes(self):
        if self.task == 'classification':
            all_labels = np.concatenate([label for _, label in self.cache])
            return np.max(all_labels) + 1
        else:
            return self.cache[0][1].shape[0] if len(self.cache) > 0 else 0

    def get_generator(self, subset):
        while True:
            for img_path, label in self.cache:
                image = load_img(img_path, target_size=(224, 224))
                image = img_to_array(image)
                image = preprocess_input(image)
                image = self.datagen.random_transform(image)
                yield image, label

# 나머지 함수들은 그대로 유지

def main(facepart_range):
    base_path = '/gdrive/MyDrive/Final project/1_Red/3_데이터수집_저장/0_데이터수집폴더/피부 데이터'

    try:
        df = pd.read_csv(os.path.join(base_path, 'json to df.csv'))
    except FileNotFoundError:
        print("CSV 파일을 찾을 수 없습니다. 경로를 확인해주세요.")
        return
    except pd.errors.EmptyDataError:
        print("CSV 파일이 비어있습니다.")
        return
    except pd.errors.ParserError:
        print("CSV 파일 파싱 중 오류가 발생했습니다. 파일 형식을 확인해주세요.")
        return

    for col in ['info', 'images', 'annotations', 'equipment']:
        df[col] = df[col].apply(lambda x: eval(x) if isinstance(x, str) else x)

    df_train = df[df['split'] == 'Training']

    for facepart in facepart_range:
        for task in ['classification', 'regression']:
            if task == 'regression' and facepart in [2, 7]:
                continue

            print(f"facepart {facepart} {task} 처리 중")

            if facepart == 0:
                img_dir = '/gdrive/MyDrive/Final project/1_Red/3_데이터수집_저장/0_데이터수집폴더/피부 데이터/Training/01.원천데이터'
            else:
                img_dir = f'/gdrive/MyDrive/Final project/1_Red/4_데이터탐색_전처리/facepart별 피부 이미지/Training_cropped/{facepart}'

            if not os.path.exists(img_dir):
                print(f"이미지 디렉토리를 찾을 수 없습니다: {img_dir}")
                continue

            try:
                dataset = CachedDataset(img_dir, df_train, facepart, task)
                if len(dataset) == 0:
                    print(f"facepart {facepart}의 {task} 데이터셋이 비어 있습니다. 다음으로 진행합니다.")
                    continue

                # 모델 생성
                num_classes = dataset.get_num_classes()
                model = create_resnet_model(num_classes, task)

                # 데이터 분할
                train_size = int(0.9 * len(dataset))
                val_size = len(dataset) - train_size
                train_dataset, val_dataset = train_test_split(dataset, test_size=0.1, random_state=0)

                # 모델 학습
                model = train_model(model, train_dataset, val_dataset, num_epochs=100,
                                    facepart=facepart, task=task)
            except Exception as e:
                print(f"처리 중 오류 발생: {str(e)}")
                continue

            # 최종 모델 저장
            save_path = '/gdrive/MyDrive/Final project/1_Red/5_분석모델링/피부진단'
            try:
                model.save(os.path.join(save_path, f'final_model_resnet50_facepart_{facepart}_{task}.h5'))
            except Exception as e:
                print(f"모델 저장 중 오류 발생: {str(e)}")

if __name__ == "__main__":
    keep_alive()
    user_input = input("처리할 facepart 범위를 선택하세요 (1: 0-2, 2: 3-6, 3: 7-8): ")
    if user_input == '1':
        facepart_range = range(0, 3)
    elif user_input == '2':
        facepart_range = range(3, 7)
    elif user_input == '3':
        facepart_range = range(7, 9)
    else:
        print("잘못된 입력입니다.")
        exit()

    main(facepart_range)

In [None]:
df.iloc[954]['annotations']

In [None]:
base_path = '/gdrive/MyDrive/Final project/1_Red/3_데이터수집_저장/0_데이터수집폴더/피부 데이터'
df = pd.read_csv(os.path.join(base_path, 'json to df.csv'))
for col in ['info', 'images', 'annotations', 'equipment']:
        df[col] = df[col].apply(lambda x: eval(x) if isinstance(x, str) else x)
# 'acne' 키의 값이 0이 아닌 행 필터링
filtered_df = df[df['annotations'].apply(lambda x: x.get('acne', 0) != 0)]

print(filtered_df)