In [1]:
from os.path import join
import h5py as h # ver = 2.9.0
import numpy as np
import pickle
from keras.utils import to_categorical

Using TensorFlow backend.


# 1. Data Exploration

In [2]:
hdf = h.File(join('data_to_keras', 'vist_dataset.hdf5'), 'r')
#hdf.close()

In [3]:
# 'train', 'test', 'val'
print(hdf.keys())

<KeysViewHDF5 ['test', 'train', 'val']>


In [4]:
hdf['train/img'].shape # all image features
# 1024 img features: np.ndarray, float32, shape = 5055, 5, 1024
# - 5055 stories/sequences
# - 5 images per sequence
# - 1024 features per image

(5055, 5, 1024)

In [5]:
print(hdf['train/img'].shape)
print(hdf['val/img'].shape)
print(hdf['test/img'].shape)

(5055, 5, 1024)
(5055, 5, 1024)
(5055, 5, 1024)


In [6]:
# all txt tokens
# txt token indices padded: np.ndarray, int,
print(hdf['train/txt'].shape)
# 0 is the index of the padding token.
print(hdf['train/txt'][0,0,:])

(5055, 5, 73)
[   1   25 2615 1189  103    3   50   35   17 3924  199   20    2    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0]


In [7]:
# net length of each sentences
# sentence length: np.ndarray, int, shape = 5055, 5
print(hdf['train/len_txt'].shape)
print(hdf['train/len_txt'][0,0])

(5055, 5)
13


In [8]:
# storyid for each datasets
# np.ndarray, int, shape 5055
print(hdf['train/storyid'].shape)
print(hdf['train/storyid'][0])

(5055,)
47175


In [9]:
with open(join('data_to_keras', 'modif_vocab_4_plain.pkl'), 'rb') as f:
    modif_vocab_4_plain = pickle.load(f)

In [10]:
vocab_size = len(modif_vocab_4_plain['idx2word'].keys())
print(vocab_size)

11928


In [11]:
token_ids = list(modif_vocab_4_plain['idx2word'].keys())

In [12]:
to_categorical(token_ids[1:])

array([[0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [13]:
to_categorical(token_ids[1:]).shape

(11927, 11928)

In [14]:
for i in range(11921,len(modif_vocab_4_plain['idx2word'].keys())):
    print(modif_vocab_4_plain['idx2word'][i])

officiant
vowels
muse
vic
lorrie
elaine
lagunitas


In [15]:
to_categorical(hdf.get('train/txt'))[()].shape

(5055, 5, 73, 11921)

In [16]:
to_categorical(hdf.get('test/txt'))[()].shape

(5055, 5, 73, 11921)

In [17]:
to_categorical(hdf.get('val/txt'))[()].shape

(5055, 5, 73, 11921)

In [18]:
max(modif_vocab_4_plain['idx2word'].keys())

11927

# 2. Keras Seq2Seq Model

__Seq2Seq model configurations__
* LSTM cell
* Encoder LSTM depth = 2
* Decoder LSTM depth = 1


__Optimizer configurartions__
* Loss: (stable) cross-entropy
* Adam optimzer
* `learning_rate = 10**(-3)
* `batch_size = 64`
* `weight decay = 1e-5`

__Reguarization configurations__
* Dropout rate = 0.5

In [19]:
import seq2seq
import tensorflow as tf
import h5py as h # ver = 2.9.0
import numpy as np
from seq2seq.models import AttentionSeq2Seq
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras.backend.tensorflow_backend import set_session
from os.path import join

config = tf.ConfigProto()

config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
sess = tf.Session(config=config)
set_session(sess)  # set this TensorFlow session as the default session for Keras

In [20]:
batch_size = 64

In [21]:
hdf = h.File(join('data_to_keras', 'vist_dataset.hdf5'), 'r')
X_train = hdf.get('train/img')[()]

In [24]:
Y_train_seq = hdf.get('train/txt')[()][:,0:2,:]

In [25]:
Y_train_seq.shape

(5055, 2, 73)

In [26]:
Y_train_seq = np.reshape(Y_train_seq, (Y_train_seq.shape[0], -1))
Y_train_seq.shape

(5055, 146)

In [27]:
Y_train = to_categorical(Y_train_seq)

In [28]:
print(type(X_train))
print(type(Y_train))
print(X_train.shape)
print(Y_train.shape)

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(5055, 5, 1024)
(5055, 146, 11896)


In [29]:
_, output_length, vocab_size = Y_train.shape

In [30]:
model = AttentionSeq2Seq(output_dim=vocab_size, output_length=output_length, batch_input_shape=None,
                     batch_size=None, input_shape=None, input_length=5,
                     input_dim=1024, hidden_dim=256, depth=(2,1),
                     bidirectional=False, unroll=False, stateful=False, dropout=0.5)

In [31]:
#model = AttentionSeq2Seq(input_dim=5, input_length=, hidden_dim=10, output_length=8, output_dim=20, depth=4)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=1e-3, decay=1e-5))

In [32]:
model.fit(X_train, Y_train, batch_size=batch_size, epochs=1, verbose=1)

Epoch 1/1


<keras.callbacks.History at 0x7f0eb15bdc88>