In [2]:

# coding: utf-8

# In[1]:

import pandas as pd
import numpy as np
import pickle

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
import pdb
import time
import gc
from scipy.sparse import csr_matrix, lil_matrix

from gensim.models import KeyedVectors


# In[3]:

with open("./data/features/train_features.pkl", "rb") as handle:
    train_features = pickle.load(handle)


# In[4]:

with open("./data/features/valid_features.pkl", "rb") as handle:
    valid_features = pickle.load(handle)


# In[6]:

train_captions = pd.read_csv("./data/split_lists/train_ids.csv", dtype = str)
valid_captions = pd.read_csv("./data/split_lists/valid_ids.csv", dtype = str)


# In[7]:

# does everything make sense, in terms of shapes? 
print(valid_captions.shape[0] == len(valid_features))
print(train_captions.shape[0] == len(train_features))


# In[8]:

#train_captions.caption = "startseq " + train_captions.caption + " endseq"
#valid_captions.caption = "startseq " + valid_captions.caption + " endseq"
train_captions = train_captions.dropna()
valid_captions = valid_captions.dropna()

# In[9]:

train_captions.head()


# In[10]:

# fit a tokenizer 
tokenizer = Tokenizer()


# In[11]:

all_captions = np.concatenate([train_captions.caption.values,valid_captions.caption.values])


# In[12]:

# fit tokenizer
tokenizer.fit_on_texts(all_captions.astype(str))


# In[13]:

# store the vocabulary size
vocab_size = 1 + len(tokenizer.word_index)
VOCAB_SIZE = vocab_size


# In[14]:

with open("./tokenizer.pkl", "wb") as handle:
    pickle.dump(tokenizer, handle)


# In[]:

def encode_and_pad(caption, sequence_length = 15):
    # questions encoded as index vectors
    encoded = tokenizer.texts_to_sequences([caption])
    # padded squences to be of length [sequence_length]
    padded = pad_sequences(encoded, 
                            maxlen = sequence_length,
                            padding = "post", 
                            truncating = "post")[0]
    return(padded)


# In[16]:

def encode(caption):
    # questions encoded as index vectors
    encoded = tokenizer.texts_to_sequences([caption])[0]
    return (encoded)


# In[17]:


# convert a dictionary of {photo_id : photo-featres} pairs and a dataframe of captions into two numpy arrays
# that can be used as a consolidated training dataset
def consolidate_dataset(photo_id,features_dict, captions_df, sequence_length = 155):
    # keep track of the photo features and caption sequenes in lists
    X_photos, X_captions = [], []
    y = [] # build response vector
    e = 0
    prevtime = time.time()
    current_feature = features_dict
    current_caption = str(captions_df.loc[captions_df.photo_id == photo_id].iloc[0]["caption"])
    current_caption_split = current_caption.split()
    for i in range(1,len(current_caption.split())):
        # add a copy of the photo features
        X_photos.append(current_feature)
        # encode the input and output sequence
        in_words, out_word = " ".join(current_caption_split[:i]), current_caption_split[i]
        in_seq = encode_and_pad(in_words, sequence_length = sequence_length)
        # add the training sequences and responses to list
        X_captions.append(in_seq)
        out_word = to_categorical([encode(out_word)], num_classes = vocab_size)[0]
        y.append(out_word)
    # return all three
    return np.array(X_photos), np.array(X_captions), np.array(y)
                                                   
# In[28]:

# data generator, intended to be used in a call to model.fit_generator()
def data_generator(features_dict, captions_df, sequence_length = 155):
    # loop for ever over images
    while 1:
        for photo_id in captions_df['photo_id']:
        # if the photo_id is not in the feature dictionary, move on
            if photo_id not in features_dict:
                continue
            current_feature = features_dict[photo_id][0]
            X_photos, X_captions, y = consolidate_dataset(photo_id, current_feature, captions_df, sequence_length = 155)
            yield ([X_photos, X_captions], y)


# In[29]:
# In[30]:

import numpy as np
import pandas as pd
import pickle
from keras import models
from keras.utils import to_categorical
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input, Dense, LSTM, Embedding, Dropout, RepeatVector, TimeDistributed, Masking
from keras.layers.merge import add, concatenate
from keras.callbacks import ModelCheckpoint
from keras.optimizers import SGD


# In[31]:

def load_npy(path):
    with open(path, "rb") as handle:
        arr = np.load(handle)
    handle.close()
    return (arr)


# In[32]:

#embedding_matrix = load_npy("./embedding_matrix.npy")



# In[34]:


def define_model(vocab_size, max_length):
    # feature extractor model
    inputs1 = Input(shape=(4096,))
    fe1 = Dropout(0.5)(inputs1)
    fe2 = Dense(256, activation='relu')(fe1)
    # sequence model
    inputs2 = Input(shape=(max_length,))
    se1 = Embedding(vocab_size, 300, mask_zero=True)(inputs2)
    se2 = Dropout(0.5)(se1)
    se3 = LSTM(256)(se2)
    # decoder model
    decoder1 = add([fe2, se3])
    decoder2 = Dense(256, activation='relu')(decoder1)
    outputs = Dense(vocab_size, activation='softmax')(decoder2)
    # tie it together [image, seq] [word]
    model = Model(inputs=[inputs1, inputs2], outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    # summarize model
    print(model.summary())
   # plot_model(model, to_file='model.png', show_shapes=True)
    return model

model = define_model(VOCAB_SIZE, 155)

gen = data_generator(train_features, train_captions, 155)
#y, t = next(gen)
#print(y[0].shape)
#print(y[1].shape)
#print(t.shape)


# epochs = 2
# steps = len(train_captions)
# for i in range(0,epochs):
#     # create the data generator
#     generator1 = data_generator(train_features, train_captions, sequence_length=155)
#     # fit for one epoch
#     model.fit(generator1, epochs=1, steps_per_epoch=steps, verbose=1)
#     # save model
#     model.save('./fresh_models/model_' + str(i) + '.h5')


# In[ ]:





False
False
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 155)          0                                            
__________________________________________________________________________________________________
input_1 (InputLayer)            (None, 4096)         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 155, 300)     1936200     input_2[0][0]                    
__________________________________________________________________________________________________
drop

In [None]:
model = models.load_model("./fresh_models/model_1.h5")
epochs = 4
steps = len(train_captions)
for i in range(2,epochs):
    # create the data generator
    generator1 = data_generator(train_features, train_captions, sequence_length=155)
    # fit for one epoch
    model.fit(generator1, epochs=1, steps_per_epoch=steps, verbose=1)
    # save model
    model.save('./fresh_models/model_' + str(i) + '.h5')


Epoch 1/1
Epoch 1/1