In [1]:
import numpy as np
import pandas as pd

import os
for dirname, _, filenames in os.walk('/data'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [2]:
import os
import cv2
import random
import shutil
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras as keras
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from PIL import Image

In [3]:
#경로 지정
train_dir = 'data/Training'
test_dir = 'data/Testing'

In [4]:
#레이블 값 (classes)
classes = os.listdir(train_dir)
print(classes)

['no_tumor', 'meningioma_tumor', 'glioma_tumor', 'pituitary_tumor']


In [5]:
resize_size = 256
crop_size = 224

def preprocess_image(image):
    image = tf.image.resize(image, [resize_size, resize_size], method=tf.image.ResizeMethod.BILINEAR) #크기 조절
    image = tf.image.central_crop(image, central_fraction=crop_size / resize_size) #중앙 224x224
    image = tf.math.divide(image, 255.0) #normalize
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    image = (image - mean) / std #다 normalize

    return image

In [6]:
from sklearn.preprocessing import LabelEncoder

X = [] #Image
y = [] #class
for i in classes:
    folderPath = os.path.join(train_dir,i)
    for j in tqdm(os.listdir(folderPath)):
        img = cv2.imread(os.path.join(folderPath,j)) #이미지 읽기
        img = preprocess_image(img) #전처리
        X.append(img) #X list 넣고
        y.append(i) # y list
X = np.array(X)
y = np.array(y)
y = tf.keras.utils.to_categorical([classes.index(label) for label in y]) #문자열 -> [0,3]

  1%|▎                                            | 3/395 [00:00<00:13, 28.97it/s]

Metal device set to: Apple M2 Pro


100%|██████████████████████████████████████████| 395/395 [00:01<00:00, 238.36it/s]
100%|██████████████████████████████████████████| 822/822 [00:03<00:00, 248.05it/s]
 78%|████████████████████████████████▋         | 644/826 [00:02<00:00, 242.64it/s]


KeyboardInterrupt: 

In [None]:
X_train, xx, y_train, yy = train_test_split(X,y, test_size=0.2, random_state=42) #training 분할 (train,val)
X_train, X_val, y_train, y_val = train_test_split(xx,yy, test_size=0.5, random_state=42) #training 분할 (train,val)

In [None]:
X_test = []
y_test = []
for i in classes:
    folderPath = os.path.join(test_dir,i)
    for j in tqdm(os.listdir(folderPath)):
        img = cv2.imread(os.path.join(folderPath,j))
        img = preprocess_image(img)
        X_test.append(img)
        y_test.append(i)
X_test = np.array(X_test)
y_test = np.array(y_test)
y_test = tf.keras.utils.to_categorical([classes.index(label) for label in y_test]) #testing -> testset

In [None]:
# img_datagen = ImageDataGenerator(
#     rotation_range=30,
#     rescale = 1./255.,
#     width_shift_range = 0.3,
#     height_shift_range = 0.3,
#     shear_range = 0.3,
#     zoom_range = 0.3,
#     horizontal_flip = True,
#     vertical_flip = True)

# img_datagen.fit(X_train)

In [None]:
#그래프 그려주는 거 (성능 그래프)
def plot_acc_model(acc, val_acc, epochs):
    plt.plot(epochs, acc, 'r', label='Training accuracy')
    plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
    plt.title('Training and validation accuracy')
    plt.legend(loc=0)
    plt.figure()
    plt.show()
    
def plot_loss_model(loss, val_loss, epochs):
    plt.plot(epochs, loss, 'r', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend(loc=0)
    plt.figure()
    plt.show()

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetV2L
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model

In [None]:
#VGG16, imagenet에 대해서 사전학습된거, 224x224 -> Input, layer.trainable -> 사전학습된 모델이어서 처음에는 가중치 false 학습이 안되어서
base_model = EfficientNetV2L(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
for layer in base_model.layers:
    layer.trainable = False

In [None]:
x = Flatten()(base_model.output)
#class가 4개라서
output = Dense(4, activation='softmax')(x)

In [None]:
#파라미터 튜닝 : learning_rate 부분 값 수정, optimizer -> 다른 optimizer 사용, loss = 다른 loss 사용.
model = Model(inputs=base_model.input, outputs=output)
optimizer=tf.keras.optimizers.SGD(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger

epochs = 100

# 조기 종료를 위한 콜백
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)

# 최고의 모델을 저장하기 위한 콜백
best_model_checkpoint = ModelCheckpoint("model/eff/best_model.h5", monitor='val_loss', save_best_only=True, mode='min', verbose=1)

# 10 에폭마다 모델을 저장하기 위한 콜백
model_checkpoint = ModelCheckpoint("model/eff/eff_epoch{epoch}.h5", period=1, verbose=1)

# CSV 파일에 기록하기 위한 콜백
csv_logger = CSVLogger('training_log.csv', separator=',', append=False)

# 훈련 기록 저장을 위한 콜백
class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.val_losses = []
        self.acc = []
        self.val_acc = []

    def on_epoch_end(self, epoch, logs={}):
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.acc.append(logs.get('accuracy'))
        self.val_acc.append(logs.get('val_accuracy'))

history_callback = LossHistory()

# 모델 훈련
history = model.fit(X_train, y_train,
                    epochs=epochs,
                    validation_data=(X_val, y_val),
                    verbose=1,
                    callbacks=[early_stopping, best_model_checkpoint, model_checkpoint, history_callback, csv_logger])

# 훈련 기록에 접근
print("훈련 손실: ", history_callback.losses)
print("검증 손실: ", history_callback.val_losses)
print("훈련 정확도: ", history_callback.acc)
print("검증 정확도: ", history_callback.val_acc)

In [None]:
best_model = keras.models.load_model("eff"+str(epochs)+".h5")

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))

In [None]:
plot_acc_model(acc, val_acc, epochs)
plot_loss_model(loss, val_loss, epochs)

In [None]:
from sklearn.metrics import classification_report,accuracy_score

best_model = keras.models.load_model("./eff100.h5")
y_pred = best_model.predict(X_test)
y_pred_single_label = np.argmax(y_pred, axis=1)
y_test_single_label = np.argmax(y_test, axis=1)
from sklearn.metrics import classification_report,accuracy_score

class_report = classification_report(y_test_single_label, y_pred_single_label, zero_division=1)
print(class_report)

In [None]:
from lime.lime_image import LimeImageExplainer
import matplotlib.pyplot as plt

explainer = LimeImageExplainer()
image_to_explain = X_test[0]
explanation = explainer.explain_instance(X_test[0].astype('double'), best_model.predict, top_labels=1, hide_color=0, num_samples=1000)

from skimage.segmentation import mark_boundaries
temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=False, num_features=10, hide_rest=False)
plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))

In [None]:
temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=True, num_features=10, hide_rest=True)
plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))

In [None]:
import shap

shap.initjs()
masker = shap.maskers.Image(explanation.top_labels[0], X_test[0].shape)
explainer = shap.Explainer(best_model, masker, output_names=classes)
explainer
shap_values = explainer(X_test[:4], outputs=shap.Explanation.argsort.flip[:5])
shap_values.shape
shap.image_plot(shap_values)

In [50]:
print(y_test[0:4])

[[1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]]
