In [4]:
pip install lime

Defaulting to user installation because normal site-packages is not writeable
Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting scikit-image>=0.12 (from lime)
  Downloading scikit_image-0.21.0-cp38-cp38-macosx_10_9_x86_64.whl.metadata (14 kB)
Collecting imageio>=2.27 (from scikit-image>=0.12->lime)
  Downloading imageio-2.34.0-py3-none-any.whl.metadata (4.9 kB)
Collecting tifffile>=2022.8.12 (from scikit-image>=0.12->lime)
  Downloading tifffile-2023.7.10-py3-none-any.whl.metadata (31 kB)
Collecting PyWavelets>=1.1.1 (from scikit-image>=0.12->lime)
  Downloading PyWavelets-1.4.1-cp38-cp38-macosx_10_13_x86_64.whl.metadata (1.9 kB)
Collecting lazy_loader>=0.2 (from scikit-image>=0.12->lime)
  Downloading lazy_loader-0.3-py3-none-any.whl.metadata (4.3 kB)
Downloading scikit_image-

In [6]:
# from google.colab import drive
# drive.mount('/content/drive')

import os
import numpy as np
import pandas as pd
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Dropout, add
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
import matplotlib.pyplot as plt
from underthesea import word_tokenize
import shap
from lime import lime_image
from sklearn.decomposition import PCA

def load_dataset(base_path='Inbreast'):
    image_paths = []
    captions = []
    for img_name in os.listdir(f'{base_path}/image'):
        if img_name.endswith('.jpg'):
            image_path = f'{base_path}/image/{img_name}'
            caption_path = f'{base_path}/caption/{img_name.replace(".jpg", ".txt")}'

            with open(caption_path, 'r') as f:
                caption = f.read()

            image_paths.append(image_path)
            captions.append(caption)

    return image_paths, captions

image_paths, captions = load_dataset()

def preprocess_image(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array_expanded = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array_expanded)

resnet = ResNet50(weights='imagenet', include_top=False, pooling='avg')

# Tokenization và Padding cho tiếng Việt
tokenizer = Tokenizer(oov_token="<unk>")
captions_tokenized = [word_tokenize(caption) for caption in captions]
tokenizer.fit_on_texts(captions_tokenized)
vocab_size = len(tokenizer.word_index) + 1
sequences = tokenizer.texts_to_sequences(captions_tokenized)
max_length = max(len(s) for s in sequences)
captions_padded = pad_sequences(sequences, maxlen=max_length, padding='post')

def build_model(vocab_size, max_length):
    # Image feature extractor layer
    inputs1 = Input(shape=(2048,))  # ResNet50 output
    fe1 = Dropout(0.5)(inputs1)
    fe2 = Dense(256, activation='relu')(fe1)

    # Sequence processor layer
    inputs2 = Input(shape=(max_length,))
    se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
    se2 = Dropout(0.5)(se1)
    se3 = LSTM(256)(se2)

    # Decoder layer
    decoder1 = add([fe2, se3])
    decoder2 = Dense(256, activation='relu')(decoder1)
    outputs = Dense(vocab_size, activation='softmax')(decoder2)

    # Tie it together [image, seq] [word]
    model = Model(inputs=[inputs1, inputs2], outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam')

    return model

vocab_size = len(tokenizer.word_index) + 1
model = build_model(vocab_size, max_length)

model.summary()

# PCA cho hình ảnh
pca = PCA(n_components=2048)
image_data = []
for img_path in image_paths:
    preprocessed_img = preprocess_image(img_path)
    features = resnet.predict(preprocessed_img, verbose=0)
    image_data.append(features.flatten())
image_data = np.array(image_data)
image_data_pca = pca.fit_transform(image_data)

image_features = {}
for i, img_path in enumerate(image_paths):
    image_id = img_path.split('/')[-1].split('.')[0]
    image_features[image_id] = image_data_pca[i]

images = list(image_features.keys())

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

def data_generator(captions, image_features, tokenizer, max_length, batch_size):
    X1, X2, y = list(), list(), list()
    n=0
    while 1:
        for i, caption in enumerate(captions):
            n+=1
            image_id = images[i]
            photo = image_features[image_id]
            seq = tokenizer.texts_to_sequences([caption])[0]

            for i in range(1, len(seq)):
                in_seq, out_seq = seq[:i], seq[i]
                in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
                out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]

                X1.append(photo)
                X2.append(in_seq)
                y.append(out_seq)

            if n == batch_size:
                yield [[np.array(X1), np.array(X2)], np.array(y)]
                X1, X2, y = list(), list(), list()
                n=0

batch_size = 1
steps = len(captions) // batch_size

# Sử dụng SHAP và LIME
explainer_shap = shap.Explainer(model.predict, data=(image_data_pca, captions_padded))
explainer_lime = lime_image.LimeImageExplainer()

for i in range(5):
    generator = data_generator(captions, image_features, tokenizer, max_length, batch_size)
    model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)

    # Sử dụng SHAP và LIME
    shap_values = explainer_shap.shap_values((specific_image_pca, specific_caption))
    explanation_lime = explainer_lime.explain_instance(specific_image, model.predict)
    shap.image_plot(shap_values[0], specific_image_pca)
    explanation_lime.show_in_notebook()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_5 (InputLayer)        [(None, 128)]                0         []                            
                                                                                                  
 input_4 (InputLayer)        [(None, 2048)]               0         []                            
                                                                                                  
 embedding_1 (Embedding)     (None, 128, 256)             70912     ['input_5[0][0]']             
                                                                                                  
 dropout_2 (Dropout)         (None, 2048)                 0         ['input_4[0][0]']             
                                                                                            

ValueError: n_components=2048 must be between 0 and min(n_samples, n_features)=296 with svd_solver='full'

In [None]:
# Chia dữ liệu thành tập huấn luyện và tập kiểm tra
from sklearn.model_selection import train_test_split

image_paths_train, image_paths_test, captions_train, captions_test = train_test_split(image_paths, captions_padded, test_size=0.2, random_state=42)

# Tạo generator cho tập huấn luyện và tập kiểm tra
batch_size = 32
train_steps = len(captions_train) // batch_size
test_steps = len(captions_test) // batch_size

train_generator = data_generator(captions_train, image_features, tokenizer, max_length, batch_size)
test_generator = data_generator(captions_test, image_features, tokenizer, max_length, batch_size)

# Huấn luyện mô hình
epochs = 10
history = model.fit(train_generator, epochs=epochs, steps_per_epoch=train_steps, validation_data=test_generator, validation_steps=test_steps)

# Đánh giá mô hình trên tập kiểm tra
loss, accuracy = model.evaluate(test_generator, steps=test_steps)
print("Loss on test set:", loss)
print("Accuracy on test set:", accuracy)

# Vẽ biểu đồ loss và accuracy
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()