In [None]:
%%bash

# 파일 삭제 명령 수정
rm -rf sample_data/content/sample_data/ISIC_2019_Training_Input
rm -rf sample_data/content/sample_data/ISIC_2019_Training_GroundTruth.csv
rm -rf sample_data/content/sample_data/ISIC_2019_Training_Metadata.csv

# 압축 파일 복사 및 해제
cp /content/drive/MyDrive/archive.zip sample_data/
unzip -o sample_data/archive.zip -d sample_data/  # '-o' 옵션으로 자동 덮어쓰기


Output hidden; open in https://colab.research.google.com to view.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split


In [None]:
data_dir = "/content/sample_data/ISIC_2019_Training_Input/ISIC_2019_Training_Input"
label_file = "/content/sample_data/ISIC_2019_Training_GroundTruth.csv"

In [None]:
data = pd.read_csv(label_file)

In [None]:
data['image_path'] = data['image'].apply(lambda x: os.path.join(data_dir, f"{x}.jpg"))

In [None]:
# 데이터셋 분리 및 전처리
# 멀티클래스 레이블 추출 및 문자열로 변환
labels = data.iloc[:, 1:-1].values  # MEL, NV, BCC 등
labels = np.argmax(labels, axis=1)  # One-hot에서 클래스 인덱스로 변환
labels = labels.astype(str)  # 정수를 문자열로 변환

# 학습 및 검증 데이터 분리
train_paths, val_paths, train_labels, val_labels = train_test_split(
    data['image_path'], labels, test_size=0.2, random_state=42, stratify=labels
)


# 1. 데이터 증강 객체 정의
# data_gen = ImageDataGenerator(
#     rescale=1.0/255,  # 정규화
#     rotation_range=20,
#     width_shift_range=0.1,
#     height_shift_range=0.1,
#     horizontal_flip=True
# )

data_gen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# 2. 학습 및 검증 데이터 로더 생성
train_gen = data_gen.flow_from_dataframe(
    dataframe=pd.DataFrame({'filename': train_paths, 'class': train_labels}),
    x_col='filename',
    y_col='class',
    target_size=(224, 224),
    batch_size=32,
    class_mode='sparse'
)

val_gen = ImageDataGenerator(rescale=1.0/255).flow_from_dataframe(
    dataframe=pd.DataFrame({'filename': val_paths, 'class': val_labels}),
    x_col='filename',
    y_col='class',
    target_size=(224, 224),
    batch_size=32,
    class_mode='sparse'
)




Found 20264 validated image filenames belonging to 8 classes.
Found 5067 validated image filenames belonging to 8 classes.


In [None]:
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(256, activation='relu'),
    Dense(len(np.unique(labels)), activation='softmax')
])


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [None]:
# 모델 컴파일
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)


In [None]:
from sklearn.utils.class_weight import compute_class_weight

class_weights = compute_class_weight(
    'balanced',
    classes=np.unique(train_labels),
    y=train_labels
)

# 4. 모델 훈련
model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10,
    steps_per_epoch=len(train_gen),
    validation_steps=len(val_gen)
)


Epoch 1/10


  self._warn_if_super_not_called()


[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m784s[0m 1s/step - accuracy: 0.6088 - loss: 1.1400 - val_accuracy: 0.5151 - val_loss: 1.4743
Epoch 2/10
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/10


  self.gen.throw(typ, value, traceback)


[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m692s[0m 992ms/step - accuracy: 0.6925 - loss: 0.8816 - val_accuracy: 0.6063 - val_loss: 1.4410
Epoch 4/10
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 5/10
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m701s[0m 1s/step - accuracy: 0.7239 - loss: 0.7774 - val_accuracy: 0.6440 - val_loss: 1.0442
Epoch 6/10
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 7/10
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m646s[0m 1s/step - accuracy: 0.7344 - loss: 0.7433 - val_accuracy: 0.6846 - val_loss: 0.9039
Epoch 8/10
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 9/10
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m649s[0m 1s/step - accuracy: 0.7494 - loss: 0.6956 - val_acc

<keras.src.callbacks.history.History at 0x7ed01d5c2290>

In [None]:
model.save("skin_cancer_classification.h5")



In [None]:
from sklearn.utils.class_weight import compute_class_weight

class_weights = compute_class_weight(
    'balanced',
    classes=np.unique(train_labels),
    y=train_labels
)

# 4. 모델 훈련
model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10,
    steps_per_epoch=len(train_gen)
)


Epoch 1/10
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m661s[0m 945ms/step - accuracy: 0.6779 - loss: 0.8674 - val_accuracy: 0.7150 - val_loss: 0.7834
Epoch 2/10


  self.gen.throw(typ, value, traceback)


[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 85ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.7150 - val_loss: 0.7834
Epoch 3/10
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m649s[0m 946ms/step - accuracy: 0.7296 - loss: 0.7118 - val_accuracy: 0.7241 - val_loss: 0.7533
Epoch 4/10
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 83ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.7241 - val_loss: 0.7533
Epoch 5/10
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m606s[0m 908ms/step - accuracy: 0.7499 - loss: 0.6776 - val_accuracy: 0.7278 - val_loss: 0.7933
Epoch 6/10
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 82ms/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.7278 - val_loss: 0.7933
Epoch 7/10
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m632s[0m 913ms/step - accuracy: 0.7657 - loss: 0.6339 - val_accuracy: 0.7282 - val_loss: 0.7

<keras.src.callbacks.history.History at 0x7af133f68a00>

In [None]:
model.save("skin_cancer_classification2.h5")



In [None]:
# lesion_type_dict = {
#     'NV': 'Melanocytic nevi',
#     'MEL': 'Melanoma',
#     'BKL': 'Benign keratosis ',
#     'BCC': 'Basal cell carcinoma',
#     'AK': 'Actinic keratoses',
#     'VASC': 'Vascular lesions',
#     'DF': 'Dermatofibroma',
#     'SCC' : 'Squamous cell carcinoma'
# }