In [1]:
import os
import cv2
import random
import shutil
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras as keras
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from PIL import Image
import numpy as np
import pandas as pd

import os
for dirname, _, filenames in os.walk('/data'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [2]:
train_dir = 'data/Training'
test_dir = 'data/Testing'
classes = os.listdir(train_dir)

In [3]:
resize_size = 256
crop_size = 224

def preprocess_image(image):
    image = tf.image.resize(image, [resize_size, resize_size], method=tf.image.ResizeMethod.BILINEAR) #크기 조절
    image = tf.image.central_crop(image, central_fraction=crop_size / resize_size) #중앙 224x224
    image = tf.math.divide(image, 255.0) #normalize
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    image = (image - mean) / std #다 normalize

    return image

In [4]:
from sklearn.preprocessing import LabelEncoder

X = [] #Image
y = [] #class
for i in classes:
    for data_dir in [train_dir, test_dir]:
        folderPath = os.path.join(data_dir,i)
        for j in tqdm(os.listdir(folderPath)):
            img = cv2.imread(os.path.join(folderPath,j)) #이미지 읽기
            img = preprocess_image(img) #전처리
            X.append(img) #X list 넣고
            y.append(i) # y list
X = np.array(X)
y = np.array(y)
y = tf.keras.utils.to_categorical([classes.index(label) for label in y]) #문자열 -> [0,3]

  7%|██▊                                      | 27/395 [00:00<00:02, 152.15it/s]

Metal device set to: Apple M2 Pro


100%|████████████████████████████████████████| 395/395 [00:01<00:00, 225.52it/s]
100%|████████████████████████████████████████| 105/105 [00:00<00:00, 290.17it/s]
100%|████████████████████████████████████████| 822/822 [00:03<00:00, 240.16it/s]
100%|████████████████████████████████████████| 115/115 [00:00<00:00, 260.98it/s]
100%|████████████████████████████████████████| 826/826 [00:03<00:00, 244.11it/s]
100%|████████████████████████████████████████| 100/100 [00:00<00:00, 223.33it/s]
100%|████████████████████████████████████████| 827/827 [00:03<00:00, 223.43it/s]
100%|██████████████████████████████████████████| 74/74 [00:00<00:00, 169.10it/s]


In [5]:
X_train, xx, y_train, yy = train_test_split(X,y, test_size=0.2, random_state=42) #training 분할 (train,val)
X_test, X_val, y_test, y_val = train_test_split(xx,yy, test_size=0.5, random_state=42) #training 분할 (train,val)

In [6]:
#그래프 그려주는 거 (성능 그래프)
def plot_acc_model(acc, val_acc, epochs):
    plt.plot(epochs, acc, 'r', label='Training accuracy')
    plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
    plt.title('Training and validation accuracy')
    plt.legend(loc=0)
    plt.figure()
    plt.show()
    
def plot_loss_model(loss, val_loss, epochs):
    plt.plot(epochs, loss, 'r', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend(loc=0)
    plt.figure()
    plt.show()

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16, EfficientNetV2L, ResNet50
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger
import os
from vit_keras import vit, utils

epochs=20

def create_and_compile_model(base_model, output_size, model_name):
    for layer in base_model.layers:
        layer.trainable = True
    x = Flatten()(base_model.output)
    output = Dense(output_size, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    optimizer = tf.keras.optimizers.legacy.SGD(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    model_dir = f"model_not_pretrained/{model_name}/"
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    early_stopping = EarlyStopping(monitor='val_loss', patience=100, verbose=1, restore_best_weights=True)
    best_model_checkpoint = ModelCheckpoint(f"{model_dir}best_model.h5", monitor='val_loss', save_best_only=True, mode='min', verbose=1)
    #model_checkpoint = ModelCheckpoint(f"{model_dir}{model_name}_epoch{{epoch}}.h5", period=1, verbose=1)
    csv_logger = CSVLogger(f'training_log_{model_name}.csv', separator=',', append=False)
    return model, [early_stopping, best_model_checkpoint, csv_logger]


base_model_vgg = VGG16(weights=None, include_top=False, input_shape=(224, 224, 3))
model_vgg, callbacks_vgg = create_and_compile_model(base_model_vgg, output_size=4, model_name='vgg')
history_vgg = model_vgg.fit(X_train, y_train,
                            epochs=epochs,
                            validation_data=(X_val, y_val),
                            verbose=1,
                            callbacks=callbacks_vgg)
del base_model_vgg, model_vgg, callbacks_vgg

base_model_efficientnet = EfficientNetV2L(weights=None, include_top=False, input_shape=(224, 224, 3))
model_efficientnet, callbacks_efficientnet = create_and_compile_model(base_model_efficientnet, output_size=4, model_name='eff')
history_efficientnet = model_efficientnet.fit(X_train, y_train,
                                              epochs=epochs,
                                              validation_data=(X_val, y_val),
                                              verbose=1,
                                              callbacks=callbacks_efficientnet)
del based_model_efficientnet, model_efficientnet, callbacks_efficientnet


base_model_resnet = ResNet50(weights=None, include_top=False, input_shape=(224, 224, 3))
model_resnet, callbacks_resnet = create_and_compile_model(base_model_resnet, output_size=4, model_name='resnet')
history_resnet = model_resnet.fit(X_train, y_train,
                                  epochs=epochs,
                                  validation_data=(X_val, y_val),
                                  verbose=1,
                                  callbacks=callbacks_resnet)
del base_model_resnet, model_resnet, callbacks_resnet

base_model_vit = vit.vit_b16(
    image_size=224,
    classes=4,
    pretrained=True,  # 사전 학습된 가중치를 사용
    pretrained_top=False,  # 이 부분은 특별한 경우가 아니라면 변경하지 않는 것이 좋습니다.
)
model_vit, callbacks_vit = create_and_compile_model(base_model_vit, output_size=4, model_name='vit')

history_vit = model_vit.fit(X_train, y_train,
                                  epochs=epochs,
                                  validation_data=(X_val, y_val),
                                  verbose=1,
                                  callbacks=callbacks_vit)

del model_vit, callbacks_vit, base_model_vit


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



Epoch 1/20


2023-12-10 13:48:01.430631: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


 1/82 [..............................] - ETA: 1:38:12 - loss: 1.6115 - accuracy: 0.3125

In [None]:
import matplotlib.pyplot as plt

# Function to plot metrics for each model
def plot_metrics(ax, histories, model_names, metric_name):
    for history, model_name in zip(histories, model_names):
        ax.plot(history.history[metric_name], label=f'{model_name} {metric_name.capitalize()}')
        ax.plot(history.history[f'val_{metric_name}'], label=f'{model_name} Validation {metric_name.capitalize()}')

    ax.set_title(f'{metric_name.capitalize()} Comparison')
    ax.set_xlabel('Epochs')
    ax.set_ylabel(metric_name.capitalize())
    ax.legend()

# Create a list of model histories and names
model_histories = [history_vit, history_vgg, history_efficientnet, history_resnet]
model_names = ['ViT', 'VGG16', 'EfficientNetV2L', 'ResNet50']

# Create subplots for accuracy
fig, axs = plt.subplots(1, 2, figsize=(16, 6))

# Plotting training and validation accuracy for each model
plot_metrics(axs[0], model_histories, model_names, 'accuracy')

# Plotting training and validation loss for each model
plot_metrics(axs[1], model_histories, model_names, 'loss')

plt.show()

In [None]:
import matplotlib.pyplot as plt

# Function to plot training and validation metrics for each model
def plot_metrics(ax, histories, model_names, metric_name, train=True, val=True):
    for history, model_name in zip(histories, model_names):
        if train:
            ax.plot(history.history[metric_name], label=f'{model_name} Training {metric_name.capitalize()}')
        if val:
            ax.plot(history.history[f'val_{metric_name}'], label=f'{model_name} Validation {metric_name.capitalize()}')

    ax.set_title(f'{metric_name.capitalize()} Comparison')
    ax.set_xlabel('Epochs')
    ax.set_ylabel(metric_name.capitalize())
    ax.legend()

# Create a list of model histories and names
model_histories = [history_vit, history_vgg, history_efficientnet, history_resnet]
model_names = ['ViT', 'VGG16', 'EfficientNetV2L', 'ResNet50']

# Create subplots for accuracy and loss
fig, axs = plt.subplots(2, 2, figsize=(16, 12))

# Plotting training accuracy for each model
plot_metrics(axs[0, 0], model_histories, model_names, 'accuracy', train=True, val=False)

# Plotting validation accuracy for each model
plot_metrics(axs[0, 1], model_histories, model_names, 'accuracy', train=False, val=True)

# Plotting training loss for each model
plot_metrics(axs[1, 0], model_histories, model_names, 'loss', train=True, val=False)

# Plotting validation loss for each model
plot_metrics(axs[1, 1], model_histories, model_names, 'loss', train=False, val=True)

plt.show()

In [None]:
import matplotlib.pyplot as plt

# Function to plot training and validation metrics for each model
def plot_metrics(ax, histories, model_names, metric_name, train=True, val=True):
    for history, model_name in zip(histories, model_names):
        if train:
            ax.plot(history.history[metric_name], label=f'{model_name} Training {metric_name.capitalize()}')
        if val:
            ax.plot(history.history[f'val_{metric_name}'], label=f'{model_name} Validation {metric_name.capitalize()}')

    ax.set_title(f'{metric_name.capitalize()} Comparison')
    ax.set_xlabel('Epochs')
    ax.set_ylabel(metric_name.capitalize())
    ax.legend()

# Create a list of model histories and names
model_histories = [history_vit, history_vgg, history_resnet]
model_names = ['ViT', 'VGG16', 'ResNet50']

# Create subplots for accuracy and loss
fig, axs = plt.subplots(2, 2, figsize=(16, 12))

# Plotting training accuracy for each model
plot_metrics(axs[0, 0], model_histories, model_names, 'accuracy', train=True, val=False)

# Plotting validation accuracy for each model
plot_metrics(axs[0, 1], model_histories, model_names, 'accuracy', train=False, val=True)

# Plotting training loss for each model
plot_metrics(axs[1, 0], model_histories, model_names, 'loss', train=True, val=False)

# Plotting validation loss for each model
plot_metrics(axs[1, 1], model_histories, model_names, 'loss', train=False, val=True)

plt.show()

In [None]:
from sklearn.metrics import classification_report,accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

for model_name in ["resnet", "vgg", "eff"]:
    best_model = keras.models.load_model("model_not_pretrained/"+model_name+"/best_model.h5")
    y_pred = best_model.predict(X_test)
    y_pred_single_label = np.argmax(y_pred, axis=1)
    y_test_single_label = np.argmax(y_test, axis=1)
    precision = precision_score(y_test_single_label, y_pred_single_label, average='macro')
    recall = recall_score(y_test_single_label, y_pred_single_label, average='macro')
    f1 = f1_score(y_test_single_label, y_pred_single_label, average='macro')
    accuracy = accuracy_score(y_test_single_label, y_pred_single_label)
    precision = round(precision, 3)
    recall = round(recall, 3)
    f1 = round(f1, 3)
    accuracy = round(accuracy, 3)
    print(str(model_name))
    print(precision, recall, f1, accuracy)

In [None]:
from tensorflow.keras import layers, models
from vit_keras import vit, utils

# 다른 부분은 그대로 두고, vit 모델의 경우만 수정
for model_name in ["vit"]:
    if model_name == 'vit':
        # 모델 정의
        base_model = vit.vit_b16(image_size=224, activation='sigmoid', pretrained=True)
        for layer in base_model.layers:
            layer.trainable = False

        x = layers.Flatten()(base_model.output)
        output_size = 4  # 출력 크기를 적절히 지정
        output = layers.Dense(output_size, activation='softmax')(x)

        model = models.Model(inputs=base_model.input, outputs=output)

        # 가중치 로딩
        model.load_weights("model_not_pretrained/"+model_name+"/best_model.h5")

    else:
        # 다른 모델들은 그대로 불러오기
        model = models.load_model("model_not_pretrained/"+model_name+"/best_model.h5")

    y_pred = model.predict(X_test)
    y_pred_single_label = np.argmax(y_pred, axis=1)
    y_test_single_label = np.argmax(y_test, axis=1)

    precision = precision_score(y_test_single_label, y_pred_single_label, average='macro')
    recall = recall_score(y_test_single_label, y_pred_single_label, average='macro')
    f1 = f1_score(y_test_single_label, y_pred_single_label, average='macro')
    accuracy = accuracy_score(y_test_single_label, y_pred_single_label)
    precision = round(precision, 3)
    recall = round(recall, 3)
    f1 = round(f1, 3)
    accuracy = round(accuracy, 3)
    print(str(model_name))
    print(precision, recall, f1, accuracy)

In [None]:
from sklearn.metrics import classification_report,accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

for model_name in ["resnet", "vgg", "eff"]:
    best_model = keras.models.load_model("aug02_model/"+model_name+"/best_model.h5")
    y_pred = best_model.predict(X_test)
    y_pred_single_label = np.argmax(y_pred, axis=1)
    y_test_single_label = np.argmax(y_test, axis=1)
    precision = precision_score(y_test_single_label, y_pred_single_label, average='macro')
    recall = recall_score(y_test_single_label, y_pred_single_label, average='macro')
    f1 = f1_score(y_test_single_label, y_pred_single_label, average='macro')
    accuracy = accuracy_score(y_test_single_label, y_pred_single_label)
    precision = round(precision, 3)
    recall = round(recall, 3)
    f1 = round(f1, 3)
    accuracy = round(accuracy, 3)
    print(str(model_name))
    print(precision, recall, f1, accuracy)
    
from tensorflow.keras import layers, models
from vit_keras import vit, utils

# 다른 부분은 그대로 두고, vit 모델의 경우만 수정
for model_name in ["vit"]:
    if model_name == 'vit':
        # 모델 정의
        base_model = vit.vit_b16(image_size=224, activation='sigmoid', pretrained=True)
        for layer in base_model.layers:
            layer.trainable = False

        x = layers.Flatten()(base_model.output)
        output_size = 4  # 출력 크기를 적절히 지정
        output = layers.Dense(output_size, activation='softmax')(x)

        model = models.Model(inputs=base_model.input, outputs=output)

        # 가중치 로딩
        model.load_weights("aug02_model/"+model_name+"/best_model.h5")

    else:
        # 다른 모델들은 그대로 불러오기
        model = models.load_model("aug02_model/"+model_name+"/best_model.h5")

    y_pred = model.predict(X_test)
    y_pred_single_label = np.argmax(y_pred, axis=1)
    y_test_single_label = np.argmax(y_test, axis=1)

    precision = precision_score(y_test_single_label, y_pred_single_label, average='macro')
    recall = recall_score(y_test_single_label, y_pred_single_label, average='macro')
    f1 = f1_score(y_test_single_label, y_pred_single_label, average='macro')
    accuracy = accuracy_score(y_test_single_label, y_pred_single_label)
    precision = round(precision, 3)
    recall = round(recall, 3)
    f1 = round(f1, 3)
    accuracy = round(accuracy, 3)
    print(str(model_name))
    print(precision, recall, f1, accuracy)

In [None]:
from lime.lime_image import LimeImageExplainer
import matplotlib.pyplot as plt
import shap
from skimage.segmentation import mark_boundaries

best_model = keras.models.load_model("model/"+model_name+"/best_model.h5")
explainer = LimeImageExplainer()
image_to_explain = X_test[0]
explanation = explainer.explain_instance(X_test[0].astype('double'), best_model.predict, top_labels=1, hide_color=0, num_samples=1000)
temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=False, num_features=10, hide_rest=False)
plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))
temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=True, num_features=5, hide_rest=True)
plt.imshow(mark_boundaries(temp / 2 + 0.5, mask))
shap.initjs()
masker = shap.maskers.Image(explanation.top_labels[0], X_test[0].shape)
explainer = shap.Explainer(best_model, masker, output_names=classes)
explainer
shap_values = explainer(X_test[:4], outputs=shap.Explanation.argsort.flip[:5])
shap_values.shape
shap.image_plot(shap_values)