In [7]:
import numpy as np
import keras.backend as K
from utils.coco_utils import load_coco_data
from keras.models import Model
from keras.layers import Input, Dense, Embedding, LSTM, Reshape, concatenate
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.losses import SparseCategoricalCrossentropy

data = load_coco_data(pca_features=True)

In [2]:
#calculate the length of each caption in the training and validation sets
data["train_captions_lens"] = np.zeros(data["train_captions"].shape[0])
data["val_captions_lens"] = np.zeros(data["val_captions"].shape[0])
for i in range(data["train_captions"].shape[0]) :
    data["train_captions_lens"][i] = np.nonzero(data["train_captions"][i] == 2)[0][0] + 1
for i in range(data["val_captions"].shape[0]) :
    data["val_captions_lens"][i] = np.nonzero(data["val_captions"][i] == 2)[0][0] + 1

In [9]:
max_length = 0
for caption in data["train_captions"]:
    if len(caption) > max_length:
        max_length = len(caption)
for caption in data["val_captions"]:
    if len(caption) > max_length:
        max_length = len(caption)
data["max_length"] = max_length

word_set = set()
for caption in data["train_captions"]:
    for word in caption:
        word_set.add(word)
for caption in data["val_captions"]:
    for word in caption:
        word_set.add(word)
data["vocab_size"] = len(word_set) + 1 # add 1 for the <PAD> token



In [10]:
#define the R-LSTM model
input_image = Input(shape=(data["train_features"].shape[1],))
encoded_image = Dense(256, activation='relu')(input_image)

input_caption = Input(shape=(data["max_length"],))
embedded_caption = Embedding(data["vocab_size"], 256, mask_zero=True)(input_caption)
encoded_caption = LSTM(256)(embedded_caption)

merged = concatenate([encoded_image, encoded_caption])
output = Dense(data["vocab_size"], activation='softmax')(merged)

model = Model(inputs=[input_image, input_caption], outputs=output)
model.compile(loss=SparseCategoricalCrossentropy(), optimizer='adam')

In [11]:
def caption_generator(features, captions, vocab_size, batch_size):
    num_samples = features.shape[0]
    num_batches = num_samples // batch_size
    
    while True:
        for i in range(num_batches):
            batch_features = features[i*batch_size:(i+1)*batch_size]
            batch_captions = captions[i*batch_size:(i+1)*batch_size]
            batch_captions_onehot = to_categorical(batch_captions, num_classes=vocab_size)
            yield ([batch_features, batch_captions], batch_captions_onehot)

batch_size = 128
train_steps_per_epoch = data["train_features"].shape[0] // batch_size
val_steps_per_epoch = data["val_features"].shape[0] // batch_size

train_generator = caption_generator(data["train_features"], data["train_captions"], data["vocab_size"], batch_size)
val_generator = caption_generator(data["val_features"], data["val_captions"], data["vocab_size"], batch_size)


In [13]:
#train the model
model.fit(train_generator, steps_per_epoch=train_steps_per_epoch, epochs=50,
          validation_data=val_generator, validation_steps=val_steps_per_epoch)

Epoch 1/50


InvalidArgumentError: Graph execution error:

Detected at node 'sparse_categorical_crossentropy/remove_squeezable_dimensions/Squeeze' defined at (most recent call last):
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\runpy.py", line 193, in _run_module_as_main
      "__main__", mod_spec)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\runpy.py", line 85, in _run_code
      exec(code, run_globals)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\ipykernel\kernelapp.py", line 583, in start
      self.io_loop.start()
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\asyncio\base_events.py", line 541, in run_forever
      self._run_once()
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\asyncio\base_events.py", line 1786, in _run_once
      handle._run()
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\asyncio\events.py", line 88, in _run
      self._context.run(self._callback, *self._args)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\tornado\ioloop.py", line 688, in <lambda>
      lambda f: self._run_callback(functools.partial(callback, future))
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\tornado\ioloop.py", line 741, in _run_callback
      ret = callback()
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\tornado\gen.py", line 814, in inner
      self.ctx_run(self.run)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\tornado\gen.py", line 775, in run
      yielded = self.gen.send(value)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\ipykernel\kernelbase.py", line 361, in process_one
      yield gen.maybe_future(dispatch(*args))
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\tornado\gen.py", line 234, in wrapper
      yielded = ctx_run(next, result)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\ipykernel\kernelbase.py", line 268, in dispatch_shell
      yield gen.maybe_future(handler(stream, idents, msg))
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\tornado\gen.py", line 234, in wrapper
      yielded = ctx_run(next, result)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\ipykernel\kernelbase.py", line 541, in execute_request
      user_expressions, allow_stdin,
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\tornado\gen.py", line 234, in wrapper
      yielded = ctx_run(next, result)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\ipykernel\ipkernel.py", line 300, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
      return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\IPython\core\interactiveshell.py", line 2858, in run_cell
      raw_cell, store_history, silent, shell_futures)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\IPython\core\interactiveshell.py", line 2886, in _run_cell
      return runner(coro)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
      coro.send(None)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\IPython\core\interactiveshell.py", line 3063, in run_cell_async
      interactivity=interactivity, compiler=compiler, result=result)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\IPython\core\interactiveshell.py", line 3254, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\IPython\core\interactiveshell.py", line 3331, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "<ipython-input-12-469e0d4a696b>", line 3, in <module>
      validation_data=val_generator, validation_steps=val_steps_per_epoch)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\keras\engine\training.py", line 1650, in fit
      tmp_logs = self.train_function(iterator)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\keras\engine\training.py", line 1249, in train_function
      return step_function(self, iterator)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\keras\engine\training.py", line 1233, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\keras\engine\training.py", line 1222, in run_step
      outputs = model.train_step(data)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\keras\engine\training.py", line 1024, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\keras\engine\training.py", line 1083, in compute_loss
      y, y_pred, sample_weight, regularization_losses=self.losses
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\keras\engine\compile_utils.py", line 265, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\keras\losses.py", line 152, in __call__
      losses = call_fn(y_true, y_pred)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\keras\losses.py", line 278, in call
      y_pred, y_true
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\keras\utils\losses_utils.py", line 200, in squeeze_or_expand_dimensions
      y_true, y_pred = remove_squeezable_dimensions(y_true, y_pred)
    File "c:\Users\asus\AppData\Local\Programs\Python\Python37\lib\site-packages\keras\utils\losses_utils.py", line 139, in remove_squeezable_dimensions
      labels = tf.squeeze(labels, [-1])
Node: 'sparse_categorical_crossentropy/remove_squeezable_dimensions/Squeeze'
Can not squeeze dim[2], expected a dimension of 1, got 1005
	 [[{{node sparse_categorical_crossentropy/remove_squeezable_dimensions/Squeeze}}]] [Op:__inference_train_function_15372]

In [None]:
model.save("reference_lstm_model.h5")