In [None]:
!pip install scipy==1.10.1 scikit-image==0.19.3 vit_keras==0.1.2

In [None]:
import os
import cv2
import random
import shutil
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras as keras
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from PIL import Image
from sklearn.preprocessing import LabelEncoder
import numpy as np
from vit_keras import vit, utils
import lime
import skimage
import shap
import pandas as pd
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from lime.lime_image import LimeImageExplainer

In [None]:
train_dir = '/kaggle/input/dogs-cats-images/dataset/training_set'
test_dir = '/kaggle/input/dogs-cats-images/dataset/test_set'

classes = os.listdir(train_dir)[0:2]
classes

In [None]:
resize_size = 256
crop_size = 224

def preprocess_image(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = tf.image.resize(image, [resize_size, resize_size], method=tf.image.ResizeMethod.BILINEAR) #크기 조절
    image = tf.image.central_crop(image, central_fraction=crop_size / resize_size) #중앙 224x224
    image = tf.math.divide(image, 255.0) #normalize
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    image = (image - mean) / std #다 normalize

    return image


def resize_and_crop(image, resize_size=256, crop_size=224):
    # Resize with bilinear interpolation
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    resized_image = tf.image.resize(image, [resize_size, resize_size], method=tf.image.ResizeMethod.BILINEAR)
    cropped_image = tf.image.central_crop(resized_image, central_fraction=crop_size / resize_size)
    image = tf.math.divide(cropped_image, 255.0) #normalize

    return image

In [None]:
!python --version

In [None]:
index=0
df = pd.DataFrame(columns=['label', 'path', 'divide'])
for i in classes:
    for data_dir in [train_dir, test_dir]:
        folderPath = os.path.join(data_dir,i)
        for j in tqdm(os.listdir(folderPath)):
            df.loc[index]=[folderPath.split('/')[-1], folderPath+"/"+j, None]
            index+=1
            
print(df)

train_df, xx = train_test_split(df, test_size=0.2, random_state=42)
val_df, test_df = train_test_split(xx, test_size=0.5, random_state=42)
train_df['divide'] = 'train'
val_df['divide'] = 'validation'
test_df['divide'] = 'test'

In [None]:
df = pd.concat([train_df, val_df, test_df], ignore_index = True)

In [None]:
df.to_csv('cat_dog_df.csv')

In [None]:
test=df[df['divide']=='test']
X_test = []
y_test = []
Original_X = []
for i in range(len(test)):
    img = cv2.imread(test['path'].iloc[i])
    p_img = preprocess_image(img)
    O_img = resize_and_crop(img)
    Original_X.append(O_img)
    X_test.append(p_img)
    y_test.append(test['label'].iloc[i])
X_test = np.array(X_test)
y_test = np.array(y_test)
y_test = tf.keras.utils.to_categorical([classes.index(label) for label in y_test])
Original_X = np.array(Original_X)

In [None]:
for model_name in ['vgg']:
    model = keras.models.load_model('/kaggle/input/dog-and-cat-classifier/'+model_name+"_best_model.h5", compile=False)
    model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])
    y_pred = model.predict(X_test)
    y_pred_single_label = np.argmax(y_pred, axis=1)
    y_test_single_label = np.argmax(y_test, axis=1)
    precision = precision_score(y_test_single_label, y_pred_single_label, average='macro')
    recall = recall_score(y_test_single_label, y_pred_single_label, average='macro')
    f1 = f1_score(y_test_single_label, y_pred_single_label, average='macro')
    accuracy = accuracy_score(y_test_single_label, y_pred_single_label)
    precision = round(precision, 3)
    recall = round(recall, 3)
    f1 = round(f1, 3)
    accuracy = round(accuracy, 3)
    print(str(model_name))
    print(model_name, ":", precision, recall, f1, accuracy)

In [None]:
X = [] #Image
y = [] #class
#Original_X = []
for i in classes:
    for data_dir in [train_dir, test_dir]:
        folderPath = os.path.join(data_dir,i)
        for j in tqdm(os.listdir(folderPath)):
            img = cv2.imread(os.path.join(folderPath,j))
            O_img = resize_and_crop(img)
            #Original_X.append(O_img)
            img = preprocess_image(img)
            X.append(img)
            y.append(i)
X = np.array(X)
y = np.array(y)
y = tf.keras.utils.to_categorical([classes.index(label) for label in y])
#Original_X = np.array(Original_X)

In [None]:
for model_name in ['vgg']:
    model = keras.models.load_model('/kaggle/input/dog-and-cat-classifier/'+model_name+"_best_model.h5", compile=False)
    model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])
    X_train,xx,y_train,yy = train_test_split(X, y, test_size=0.2,random_state=42)
    X_val,X_test,y_val,y_test = train_test_split(xx, yy, test_size=0.5,random_state=42)
    y_pred = model.predict(X_test)
    y_pred_single_label = np.argmax(y_pred, axis=1)
    y_test_single_label = np.argmax(y_test, axis=1)
    precision = precision_score(y_test_single_label, y_pred_single_label, average='macro')
    recall = recall_score(y_test_single_label, y_pred_single_label, average='macro')
    f1 = f1_score(y_test_single_label, y_pred_single_label, average='macro')
    accuracy = accuracy_score(y_test_single_label, y_pred_single_label)
    precision = round(precision, 3)
    recall = round(recall, 3)
    f1 = round(f1, 3)
    accuracy = round(accuracy, 3)
    print(str(model_name))
    print(model_name, ":", precision, recall, f1, accuracy)

In [None]:
import matplotlib.pyplot as plt
import cv2

def save_and_close_figure(image, save_path, index_to_use, predicted_class, real):
    plt.imshow(image)
    plt.axis('off')
    plt.savefig(os.path.join(save_path, f'{index_to_use}_predict_{predicted_class}_real_{real}.jpg'), bbox_inches='tight', pad_inches=0)
    plt.close('all')

    
def plot_images(original, temp, mask, model_name, model_type, index_to_use, predicted_class, real):
    
    save_paths = [f'{model_name}/{model_type}/original/', f'{model_name}/{model_type}/superpixel/', f'{model_name}/{model_type}/pos_neg/']
    
    for save_path in save_paths:
        if not os.path.exists(save_path):
            os.makedirs(save_path)

    save_and_close_figure(original, f'{model_name}/{model_type}/original/', index_to_use, predicted_class, real)
    
    # Lime Mask를 원본 이미지에 적용하여 긍부정 시각화
    masked_positive = np.copy(original)
    masked_positive = np.concatenate((masked_positive, np.ones((*masked_positive.shape[:-1], 1), dtype=masked_positive.dtype) * 255), axis=-1)  # Add alpha channel
    masked_positive[mask <= 0, -1] = 0
    
    save_and_close_figure(masked_positive, f'{model_name}/{model_type}/superpixel/', index_to_use, predicted_class, real)

    # Lime Mask를 원본 이미지에 적용하여 긍부정 시각화
    masked_negative = np.zeros_like(original)
    masked_negative[mask < 0] = [255, 0, 0]

    # Create a new image for positive parts (green color)
    masked_positive = np.zeros_like(original)
    masked_positive[mask > 0] = [0, 255, 0]

    # Combine the positive and negative images
    combined_image = original + masked_negative + masked_positive

    # Display the result with larger size
    save_and_close_figure(combined_image, f'{model_name}/{model_type}/pos_neg/', index_to_use, predicted_class, real)

In [None]:
import sys
import io
for index_to_use in range(50):
    selected_image = X_test[index_to_use]
    selected_image = np.expand_dims(selected_image, axis=0)
    prediction = model.predict(selected_image)
    predicted_class = np.argmax(prediction)
    real = np.argmax(y_test[index_to_use])
    original_stdout = sys.stdout
    sys.stdout = io.StringIO()
    explainer = lime.lime_image.LimeImageExplainer(feature_selection='auto')
    explanation = explainer.explain_instance(selected_image[0], model.predict, top_labels=1, hide_color=0, num_samples=100)
    temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=False, num_features=10, hide_rest=False)
    sys.stdout = original_stdout
    plot_images(Original_X[index_to_use], temp, mask, model_name, 'base_model', index_to_use, predicted_class, real)

In [None]:
def preprocess_for_attack(image):
    #resize, crop까지는 이미 이전 과정에서 진행했으므로
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    image = (image - mean) / std #다 normalize

    return image

In [None]:
def ifgsm_attack(model, image, label, epsilon=0.001, num_iter=30, clip_min=0.0, clip_max=1.0):
    adv_image = tf.identity(image)
    for _ in range(num_iter):
        with tf.GradientTape() as tape:
            tape.watch(adv_image)
            prediction = model(preprocess_for_attack(adv_image))
            loss = tf.keras.losses.sparse_categorical_crossentropy(label, prediction)

        gradient = tape.gradient(loss, adv_image)
        perturbation = epsilon * tf.sign(gradient)

        adv_image = tf.clip_by_value(adv_image + perturbation, clip_min, clip_max)

    adv_image_np = adv_image.numpy().squeeze()  # Squeeze to remove channel dimension

    # Lime explanation
    original_stdout = sys.stdout
    sys.stdout = io.StringIO()

    explainer = lime.lime_image.LimeImageExplainer(feature_selection='auto')
    explanation = explainer.explain_instance(adv_image_np, model.predict, top_labels=1, hide_color=0, num_samples=100)
    temp, mask = explanation.get_image_and_mask(explanation.top_labels[0], positive_only=False, num_features=10, hide_rest=False)

    sys.stdout = original_stdout
    
    # Assuming you have model_name, index_to_use, predicted_class, real defined somewhere
    plot_images(adv_image.numpy().squeeze(), temp, mask, model_name, 'adv', index_to_use, np.argmax(prediction), label)

    return adv_image, prediction

In [None]:
#Le, L. D., Fu, H., Xu, X., Liu, Y., Xu, Y., Du, J., ... & Goh, R. (2022, September). An Efficient Defending Mechanism Against Image Attacking on Medical Image Segmentation Models. In MICCAI Workshop on Resource-Efficient Medical Image Analysis (pp. 65-74). Cham: Springer Nature Switzerland.

In [None]:
save_paths = [f'{model_name}/adv/attack/']
for save_path in save_paths:
    if not os.path.exists(save_path):
        os.makedirs(save_path)
            
for index_to_use in range(len(X_test)):
    selected_image = X_test[index_to_use]
    selected_image = np.expand_dims(selected_image, axis=0)
    prediction = model.predict(selected_image)
    predicted_class = np.argmax(prediction)
    real_class = np.argmax(y_test[index_to_use])
    preprocess_input = tf.keras.applications.vgg16.preprocess_input

    image = Original_X[index_to_use]
    image = tf.expand_dims(image, axis=0)  # Add batch dimension
    label = np.argmax(y_test[index_to_use])

    original_prediction = model(preprocess_for_attack(image))
    adv_image, adv_prediction = ifgsm_attack(model, image, label)

    # Extract top-1 predictions
    original_top1_class = tf.argmax(original_prediction, axis=1)[0:5]
    adv_top1_class = tf.argmax(adv_prediction, axis=1)[0:5]
    
    save_and_close_figure(adv_image.numpy().squeeze(), f'{model_name}/adv/attack/', index_to_use, {adv_top1_class[0]}, real_class)
    
'''
    # Display the results
    plt.figure(figsize=(15, 6))
    plt.subplot(1, 4, 1)
    plt.imshow(image.numpy().squeeze())
    plt.title('Original Image')

    plt.subplot(1, 4, 2)
    plt.text(0, 0.5, f'Top-1 Prediction: Class {original_top1_class}', fontsize=12)
    plt.axis('off')

    plt.subplot(1, 4, 3)
    plt.imshow(adv_image.numpy().squeeze())
    plt.title('Adversarial Image')

    plt.subplot(1, 4, 4)
    plt.text(0, 0.5, f'Top-1 Prediction: Class {adv_top1_class}', fontsize=12)
    plt.axis('off')
    plt.show()
'''

In [None]:
import os
import shutil
from IPython.display import FileLink

# Change the current working directory to '/kaggle/working'
os.chdir('/kaggle/working')

# Define the name of the tar.gz archive
archive_name = 'kaggle_working.tar.gz'

# Create a tar.gz archive of the entire directory
shutil.make_archive(archive_name.replace('.tar.gz', ''), 'gztar', root_dir='.', base_dir='.')

# Display the link to download the archive
FileLink(archive_name)