In [12]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Dropout, add
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
import matplotlib.pyplot as plt
from underthesea import word_tokenize
import shap
from lime import lime_image

In [5]:
def load_dataset(base_path='../dataset'):
    image_paths = []
    captions = []
    for img_name in os.listdir(f'{base_path}/images'):
        if img_name.endswith('.jpg'):
            image_path = f'{base_path}/images/{img_name}'
            caption_path = f'{base_path}/captions/{img_name.replace(".jpg", ".txt")}'

            with open(caption_path, 'r') as f:
                caption = f.read()

            image_paths.append(image_path)
            captions.append(caption)

    return image_paths, captions

image_paths, captions = load_dataset()
print(f'Loaded {len(image_paths)} images and {len(captions)} captions')

Loaded 296 images and 296 captions


In [6]:
def preprocess_image(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array_expanded = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array_expanded)

resnet = ResNet50(weights='imagenet', include_top=False, pooling='avg')

In [20]:
# Tokenization và Padding cho tiếng Việt
tokenizer = Tokenizer(oov_token="<unk>", filters='!"#$%&()*+.,-/:;=?@[\]^_`{|}~ ')
captions_tokenized = [['<start>'] + word_tokenize(caption) + ['<end>'] for caption in captions]
tokenizer.fit_on_texts(captions_tokenized)
vocab_size = len(tokenizer.word_index) + 1
sequences = tokenizer.texts_to_sequences(captions_tokenized)
max_length = max(len(s) for s in sequences)
captions_padded = pad_sequences(sequences, maxlen=max_length, padding='post')

print(f'Vocab size: {vocab_size}, Max length: {max_length}')
# print samples
for i in range(3):
    print(f'Original: {captions[i]}')
    print(f'Tokenized: {captions_tokenized[i]}')
    print(f'Padded: {captions_padded[i]}')
    print()

Vocab size: 279, Max length: 130
Original: Nhũ ảnh ghi lại sự biến dạng của mô đệm của vú trái, có đường kính 15 mm, tổn thương có tiêu chí nghi ngờ trung gian.  Kết quả hình ảnh lành tính BiRads 2.
Tokenized: ['<start>', 'Nhũ', 'ảnh', 'ghi', 'lại', 'sự', 'biến dạng', 'của', 'mô đệm', 'của', 'vú', 'trái', ',', 'có', 'đường kính', '15', 'mm', ',', 'tổn thương', 'có', 'tiêu chí', 'nghi ngờ', 'trung gian', '.', 'Kết quả', 'hình ảnh', 'lành tính', 'BiRads', '2', '.', '<end>']
Padded: [  6  21  22 117 103  59 161  45 162  45   5  42   3   4  51 128  39   3
  63   4 129  32 147   2  20   8  14   9  19   2   7   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0

In [8]:
def build_model(vocab_size, max_length):
    # Image feature extractor layer
    inputs1 = Input(shape=(2048,))
    fe1 = Dropout(0.5)(inputs1)
    fe2 = Dense(256, activation='relu')(fe1)

    # Sequence processor layer
    inputs2 = Input(shape=(max_length,))
    se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
    se2 = Dropout(0.5)(se1)
    se3 = LSTM(256)(se2)

    # Decoder layer
    decoder1 = add([fe2, se3])
    decoder2 = Dense(256, activation='relu')(decoder1)
    outputs = Dense(vocab_size, activation='softmax')(decoder2)

    # Tie it together [image, seq] [word]
    model = Model(inputs=[inputs1, inputs2], outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam')

    return model

vocab_size = len(tokenizer.word_index) + 1
model = build_model(vocab_size, max_length)

model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 128)]                0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 2048)]               0         []                            
                                                                                                  
 embedding (Embedding)       (None, 128, 256)             70912     ['input_3[0][0]']             
                                                                                                  
 dropout (Dropout)           (None, 2048)                 0         ['input_2[0][0]']             
                                                                                              

In [9]:
image_features = {}
for img_path in image_paths:
    preprocessed_img = preprocess_image(img_path)
    features = resnet.predict(preprocessed_img, verbose=0)
    image_id = img_path.split('/')[-1].split('.')[0]
    image_features[image_id] = features

images = list(image_features.keys())

In [29]:
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

def data_generator(captions, image_features, tokenizer, max_length, batch_size):
    X1, X2, y = list(), list(), list()
    counter = 0  # Sử dụng biến counter thay vì n để đếm số lượng mẫu trong batch
    while True:
        for i, caption in enumerate(captions):
            image_id = images[i]
            photo = image_features[image_id][0]
            seq = tokenizer.texts_to_sequences([caption])[0]

            for j in range(1, len(seq)):  # Sử dụng biến lặp j thay vì i trong vòng lặp này
                in_seq, out_seq = seq[:j], seq[j]
                in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
                out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]

                X1.append(photo)
                X2.append(in_seq)
                y.append(out_seq)
                counter += 1

                if counter == batch_size:
                    yield [[np.array(X1), np.array(X2)], np.array(y)]
                    X1, X2, y = list(), list(), list()
                    counter = 0  # Đặt lại counter về 0 sau khi đủ kích thước batch

# Sử dụng tên biến khác nhau cho vòng lặp trong hàm data_generator

In [30]:
# from sklearn.model_selection import train_test_split

# train_image_paths, test_image_paths, train_captions, test_captions = train_test_split(image_paths, captions, test_size=0.2, random_state=42)

# print(f'Training set: {len(train_image_paths)} images, {len(train_captions)} captions')
# print(f'Testing set: {len(test_image_paths)} images, {len(test_captions)} captions')

# batch_size = 10
# train_steps = len(train_captions) // batch_size

# for i in range(2):
#     train_generator = data_generator(train_captions, image_features, tokenizer, max_length, batch_size)
#     model.fit_generator(train_generator, epochs=1, steps_per_epoch=train_steps, verbose=1)

# test_steps = len(test_captions) // batch_size
# test_generator = data_generator(test_captions, image_features, tokenizer, max_length, batch_size)
# evaluation = model.evaluate_generator(test_generator, steps=test_steps, verbose=1)

# print("Test Loss:", evaluation)




from sklearn.model_selection import train_test_split

batch_size = 10

# Đảm bảo bạn đã chia dữ liệu thành tập huấn luyện và tập kiểm tra một cách chính xác
train_image_paths, test_image_paths, train_captions, test_captions = train_test_split(image_paths, captions, test_size=0.2, random_state=42)

# Số lượng batch và bước huấn luyện
train_steps = len(train_captions) // batch_size
test_steps = len(test_captions) // batch_size

# Đào tạo mô hình
train_generator = data_generator(train_captions, image_features, tokenizer, max_length, batch_size)
test_generator = data_generator(test_captions, image_features, tokenizer, max_length, batch_size)

model.fit_generator(train_generator, epochs=2, steps_per_epoch=train_steps, validation_data=test_generator, validation_steps=test_steps)

# Sử dụng SHAP và LIME để giải thích dự đoán
X_sample, y_sample = next(data_generator(test_captions, image_features, tokenizer, max_length, 1))
X_sample_image, X_sample_text = X_sample

explainer = shap.DeepExplainer(model, [X_sample_image, X_sample_text])
shap_values = explainer.shap_values([X_sample_image, X_sample_text])

# Hiển thị giải thích SHAP
shap.image_plot(shap_values[0], -X_sample_image)


Epoch 1/2


`Model.fit_generator` is deprecated and will be removed in a future version. Please use `Model.fit`, which supports generators.


InvalidArgumentError: Graph execution error:

Detected at node 'categorical_crossentropy/softmax_cross_entropy_with_logits' defined at (most recent call last):
    File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/runpy.py", line 193, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/ipykernel_launcher.py", line 18, in <module>
      app.launch_new_instance()
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/traitlets/config/application.py", line 1075, in launch_instance
      app.start()
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/ipykernel/kernelapp.py", line 739, in start
      self.io_loop.start()
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/tornado/platform/asyncio.py", line 205, in start
      self.asyncio_loop.run_forever()
    File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.8/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue
      await self.process_one()
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/ipykernel/kernelbase.py", line 534, in process_one
      await dispatch(*args)
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell
      await result
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/ipykernel/ipkernel.py", line 359, in execute_request
      await super().execute_request(stream, ident, parent)
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/ipykernel/kernelbase.py", line 778, in execute_request
      reply_content = await reply_content
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/ipykernel/ipkernel.py", line 446, in do_execute
      res = shell.run_cell(
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/ipykernel/zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/IPython/core/interactiveshell.py", line 3009, in run_cell
      result = self._run_cell(
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/IPython/core/interactiveshell.py", line 3064, in _run_cell
      result = runner(coro)
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/IPython/core/interactiveshell.py", line 3269, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/IPython/core/interactiveshell.py", line 3448, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/var/folders/81/p37gc41n4ds1pfphccjpd46w0000gn/T/ipykernel_93850/2409195196.py", line 6, in <module>
      model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/keras/src/engine/training.py", line 2810, in fit_generator
      return self.fit(
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/keras/src/engine/training.py", line 1742, in fit
      tmp_logs = self.train_function(iterator)
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/keras/src/engine/training.py", line 1338, in train_function
      return step_function(self, iterator)
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/keras/src/engine/training.py", line 1322, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/keras/src/engine/training.py", line 1303, in run_step
      outputs = model.train_step(data)
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/keras/src/engine/training.py", line 1081, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/keras/src/engine/training.py", line 1139, in compute_loss
      return self.compiled_loss(
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/keras/src/engine/compile_utils.py", line 265, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/keras/src/losses.py", line 142, in __call__
      losses = call_fn(y_true, y_pred)
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/keras/src/losses.py", line 268, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/keras/src/losses.py", line 2122, in categorical_crossentropy
      return backend.categorical_crossentropy(
    File "/Users/nghiempt/Library/Python/3.8/lib/python/site-packages/keras/src/backend.py", line 5566, in categorical_crossentropy
      return tf.nn.softmax_cross_entropy_with_logits(
Node: 'categorical_crossentropy/softmax_cross_entropy_with_logits'
logits and labels must be broadcastable: logits_size=[10,277] labels_size=[10,279]
	 [[{{node categorical_crossentropy/softmax_cross_entropy_with_logits}}]] [Op:__inference_train_function_32774]

In [None]:
# Explain predictions using SHAP
explainer = shap.Explainer(model, image_features.values())
shap_values = explainer(image_features.values(), max_evals=2 * len(image_features) + 1)

# Explain predictions using LIME
explainer = lime_image.LimeImageExplainer()
explanation = explainer.explain_instance(image, model.predict, top_labels=5, hide_color=0, num_samples=1000)

In [27]:
# Trích xuất dữ liệu từ generator và chuyển đổi thành mảng NumPy
data_gen = data_generator(train_captions, image_features, tokenizer, max_length, batch_size)
X_sample, y_sample = next(data_gen)
X_sample_image, X_sample_text = X_sample

# Áp dụng SHAP
explainer = shap.DeepExplainer(model, [X_sample_image, X_sample_text])

# Lấy dự đoán cho một ví dụ cụ thể
shap_values = explainer.shap_values([X_sample_image, X_sample_text])

# Hiển thị giải thích SHAP
shap.image_plot(shap_values[0], -X_sample_image)

ValueError: Input 1 of layer "model" is incompatible with the layer: expected shape=(None, 128), found shape=(410, 130)