In [1]:
import numpy as np
import pandas as pd

import os
for dirname, _, filenames in os.walk('/data'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [2]:
import os
import cv2
import random
import shutil
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras as keras
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from PIL import Image

In [3]:
#경로 지정
train_dir = 'data/Training'
test_dir = 'data/Testing'

In [4]:
#레이블 값 (classes)
classes = os.listdir(train_dir)
print(classes)

['no_tumor', 'meningioma_tumor', 'glioma_tumor', 'pituitary_tumor']


In [5]:
#VGG-16, vit (전처리)
resize_size = 256
crop_size = 224

def preprocess_image(image):
    image = tf.image.resize(image, [resize_size, resize_size], method=tf.image.ResizeMethod.BILINEAR) #크기 조절
    image = tf.image.central_crop(image, central_fraction=crop_size / resize_size) #중앙 224x224
    image = tf.math.divide(image, 255.0) #normalize
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    image = (image - mean) / std #다 normalize

    return image

In [6]:
from sklearn.preprocessing import LabelEncoder

X = [] #Image
y = [] #class
for i in classes:
    folderPath = os.path.join(train_dir,i)
    for j in tqdm(os.listdir(folderPath)):
        img = cv2.imread(os.path.join(folderPath,j)) #이미지 읽기
        img = preprocess_image(img) #전처리
        X.append(img) #X list 넣고
        y.append(i) # y list
X = np.array(X)
y = np.array(y)
y = tf.keras.utils.to_categorical([classes.index(label) for label in y]) #문자열 -> [0,3]

  0%|                                             | 1/395 [00:00<00:48,  8.13it/s]

Metal device set to: Apple M2 Pro


100%|██████████████████████████████████████████| 395/395 [00:01<00:00, 231.79it/s]
100%|██████████████████████████████████████████| 822/822 [00:03<00:00, 245.73it/s]
100%|██████████████████████████████████████████| 826/826 [00:03<00:00, 240.03it/s]
100%|██████████████████████████████████████████| 827/827 [00:03<00:00, 224.70it/s]


In [7]:
X_train, xx, y_train, yy = train_test_split(X,y, test_size=0.2, random_state=42) #training 분할 (train,val)
X_train, X_val, y_train, y_val = train_test_split(xx,yy, test_size=0.5, random_state=42) #training 분할 (train,val)

In [8]:
X_test = []
y_test = []
for i in classes:
    folderPath = os.path.join(test_dir,i)
    for j in tqdm(os.listdir(folderPath)):
        img = cv2.imread(os.path.join(folderPath,j))
        img = preprocess_image(img)
        X_test.append(img)
        y_test.append(i)
X_test = np.array(X_test)
y_test = np.array(y_test)
y_test = tf.keras.utils.to_categorical([classes.index(label) for label in y_test]) #testing -> testset

100%|██████████████████████████████████████████| 105/105 [00:00<00:00, 247.31it/s]
100%|██████████████████████████████████████████| 115/115 [00:00<00:00, 238.89it/s]
100%|██████████████████████████████████████████| 100/100 [00:00<00:00, 231.11it/s]
100%|████████████████████████████████████████████| 74/74 [00:00<00:00, 162.17it/s]


In [9]:
#data augmentation 아직 적용 안햇음.
#augmentation은 어떻게 하면 좋을지..? -> 고민해주시면 좋을 것 같습니다...... 코드 짜서 주시면 좋을거같습니당....
# img_datagen = ImageDataGenerator(
#     rotation_range=30,
#     rescale = 1./255.,
#     width_shift_range = 0.3,
#     height_shift_range = 0.3,
#     shear_range = 0.3,
#     zoom_range = 0.3,
#     horizontal_flip = True,
#     vertical_flip = True)

# img_datagen.fit(X_train)

In [10]:
#그래프 그려주는 거 (성능 그래프)
def plot_acc_model(acc, val_acc, epochs):
    plt.plot(epochs, acc, 'r', label='Training accuracy')
    plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
    plt.title('Training and validation accuracy')
    plt.legend(loc=0)
    plt.figure()
    plt.show()
    
def plot_loss_model(loss, val_loss, epochs):
    plt.plot(epochs, loss, 'r', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend(loc=0)
    plt.figure()
    plt.show()

In [11]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model

In [12]:
from vit_keras import vit, utils

image_size = 224
vit_model = vit.vit_b16(
    image_size=image_size,
    activation='sigmoid',
    pretrained=True,
    include_top=True,
    pretrained_top=True
)

for layer in vit_model.layers:
    layer.trainable = False
x = Flatten()(vit_model.output)
output = Dense(4, activation='softmax')(x)

model = Model(inputs=vit_model.input, outputs=output)
optimizer=tf.keras.optimizers.SGD(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [13]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

epochs = 100

# 모든 epoch의 모델을 저장하는 콜백
checkpoint_all_epochs = ModelCheckpoint("vit_all_epochs.h5", save_best_only=False, verbose=1)

# 가장 낮은 검증 손실을 가진 모델만을 저장하는 콜백
checkpoint_best_only = ModelCheckpoint("vit_best_model.h5", monitor='val_loss', save_best_only=True, mode='min', verbose=1)

early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)

history = model.fit(X_train, y_train,
                    epochs=epochs,
                    validation_data=(X_val, y_val),
                    verbose=1,
                    callbacks=[early_stopping, checkpoint_all_epochs, checkpoint_best_only])

Epoch 1/100


2023-11-14 19:53:25.204896: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 1: saving model to vit_all_epochs.h5

Epoch 1: val_loss improved from inf to 1.34675, saving model to vit_best_model.h5
Epoch 2/100
Epoch 2: saving model to vit_all_epochs.h5

Epoch 2: val_loss improved from 1.34675 to 1.32432, saving model to vit_best_model.h5
Epoch 3/100
Epoch 3: saving model to vit_all_epochs.h5

Epoch 3: val_loss improved from 1.32432 to 1.29373, saving model to vit_best_model.h5
Epoch 4/100
Epoch 4: saving model to vit_all_epochs.h5

Epoch 4: val_loss improved from 1.29373 to 1.27513, saving model to vit_best_model.h5
Epoch 5/100
Epoch 5: saving model to vit_all_epochs.h5

Epoch 5: val_loss improved from 1.27513 to 1.26464, saving model to vit_best_model.h5
Epoch 6/100

KeyboardInterrupt: 

In [None]:
#학습 성능 확인
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))

In [None]:
#학습 성능 그래프화
plot_acc_model(acc, val_acc, epochs)
plot_loss_model(loss, val_loss, epochs)

In [None]:
from sklearn.metrics import classification_report,accuracy_score

best_model = keras.models.load_model("vit_epochs_100.h5")
y_pred = best_model.predict(X_test)
y_pred_single_label = np.argmax(y_pred, axis=1)
y_test_single_label = np.argmax(y_test, axis=1)
from sklearn.metrics import classification_report,accuracy_score

class_report = classification_report(y_test_single_label, y_pred_single_label, zero_division=1)
print(class_report)

In [None]:
print(y_pred)

In [None]:
from lime.lime_image import LimeImageExplainer
import matplotlib.pyplot as plt

explainer = LimeImageExplainer()
image_to_explain = X_test[0]
explanation = explainer.explain_instance(X_test[0].astype('double'), model_vit16.predict, top_labels=1, hide_color=0, num_samples=1000)

from skimage.segmentation import mark_boundaries
temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=False, num_features=10, hide_rest=False)
plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))

In [None]:
temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=True, num_features=5, hide_rest=True)
plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))

In [None]:
import shap

#shap도 비슷 (빨간색 : 긍정적, 파란색: 부정적)
shap.initjs()
masker = shap.maskers.Image("inpaint_telea", X_test[0].shape)
explainer = shap.Explainer(model_vit16, masker, output_names=classes)
explainer
shap_values = explainer(X_test[:4], outputs=shap.Explanation.argsort.flip[:5])
shap_values.shape
shap.image_plot(shap_values)