# set inputs to train based for different CNN model

In [1]:
feature_file_name = "densenet_features.pkl"
checkpoint_output_file = 'dense-model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
history_file_name = "dense-model_run_history.pkl"

# you dont need to touch any of the below

just run and wait

In [2]:

from utils.helpers import Config
from utils.dataprep import load_set, load_photo_features
from utils.dataprep import load_clean_descriptions, get_tokenizer, max_length_desc
from utils.inputprep import create_sequences, data_generator

c = Config()


feature_file_name = c.ExtractedFeaturesFilePath(feature_file_name)
checkpoint_output_file = checkpoint_output_file
history_file_name = c.ExtractedFeaturesFilePath(history_file_name)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
# 1. load train and dev images features 
train = load_set(c.FlickrTextFilePath("Flickr_8k.trainImages.txt"))
dev = load_set(c.FlickrTextFilePath("Flickr_8k.devImages.txt"))

# use VGG trained features 
train_features = load_photo_features(feature_file_name, train)
dev_features = load_photo_features(feature_file_name, dev)

print("Train ids: %i, and dev ids: %i" % (len(train), len(dev)))
print("Train photos: %i, and dev photos: %i" % (len(train_features), len(dev_features)))

Train ids: 6000, and dev ids: 1000
Train photos: 6000, and dev photos: 1000


In [4]:
train_features["3009047603_28612247d2"].shape[1]

1024

In [5]:
# 2. load clean descriptions for data sets. and load vocabulary 

train_descriptions = load_clean_descriptions(c.ExtractedFeaturesFilePath('descriptions.txt'), train)
dev_descriptions = load_clean_descriptions(c.ExtractedFeaturesFilePath('descriptions.txt'), dev)

print("Train descriptions: %i, and dev descriptions: %i" % (len(train_descriptions), len(dev_descriptions)))


Train descriptions: 6000, and dev descriptions: 1000


In [6]:
# 3. tokensize train and dev sets 

# prepare tokensizer
tokenizer = get_tokenizer(c.TokenizerFilePath) 
max_length = max_length_desc(train_descriptions)

vocab_size = len(tokenizer.word_index) + 1

print( "Tokensizer vocalulary size: %i, Description max length: %i " % (vocab_size, max_length))
# TODO: here we should save the tokenizer for later use, it will be needed when traslating yhat vector to a description 


Tokensizer vocalulary size: 7579, Description max length: 34 


 # 4. define and fit model

In [7]:
from keras.utils import to_categorical, plot_model
from keras.models import Model
from keras.layers import Input, Dense, LSTM, Embedding, Dropout
from keras.layers.merge import add
from keras.callbacks import ModelCheckpoint


In [8]:
# define the model
# define the captioning model
def define_model(vocab_size, max_length, input_dim = 4096):
	# feature extractor model
	inputs1 = Input(shape=(input_dim,))
	fe1 = Dropout(0.5)(inputs1)
	fe2 = Dense(256, activation='relu')(fe1)
	# sequence model
	inputs2 = Input(shape=(max_length,))
	se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
	se2 = Dropout(0.5)(se1)
	se3 = LSTM(256)(se2)
	# decoder model
	decoder1 = add([fe2, se3])
	decoder2 = Dense(256, activation='relu')(decoder1)
	outputs = Dense(vocab_size, activation='softmax')(decoder2)
	# tie it together [image, seq] [word]
	model = Model(inputs=[inputs1, inputs2], outputs=outputs)
	# compile model
	model.compile(loss='categorical_crossentropy', optimizer='adam')
	# summarize model
	model.summary()
	# plot_model(model, to_file='model.png', show_shapes=True)
	return model


In [9]:
input_dim = train_features["3009047603_28612247d2"].shape[1]
model = define_model(vocab_size, max_length, input_dim)


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 34)           0                                            
__________________________________________________________________________________________________
input_1 (InputLayer)            (None, 1024)         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 34, 256)      1940224     input_2[0][0]                    
__________________________________________________________________________________________________
dropout_1 (Dropout)  

In [10]:
train_data_generator = data_generator(train_descriptions, train_features, tokenizer, max_length)
dev_data_generator = data_generator(dev_descriptions, dev_features, tokenizer, max_length)

In [11]:
# define checkpoint callback
filepath = checkpoint_output_file # 'model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

In [12]:
 history = model.fit_generator(
 	train_data_generator,
 	epochs=20,
 	steps_per_epoch=len(train_descriptions),
 	verbose=2, # 1: progress, 2: one line per epoch
 	validation_data= dev_data_generator,
 	validation_steps=len(dev_descriptions),
 	callbacks=[checkpoint])

Instructions for updating:
Use tf.cast instead.
Epoch 1/20
 - 410s - loss: 4.5242 - val_loss: 3.9614

Epoch 00001: val_loss improved from inf to 3.96140, saving model to dense-model-ep001-loss4.541-val_loss3.961.h5
Epoch 2/20
 - 418s - loss: 3.7572 - val_loss: 3.7858

Epoch 00002: val_loss improved from 3.96140 to 3.78582, saving model to dense-model-ep002-loss3.776-val_loss3.786.h5
Epoch 3/20
 - 536s - loss: 3.4994 - val_loss: 3.7347

Epoch 00003: val_loss improved from 3.78582 to 3.73468, saving model to dense-model-ep003-loss3.519-val_loss3.735.h5
Epoch 4/20
 - 541s - loss: 3.3468 - val_loss: 3.7347

Epoch 00004: val_loss did not improve from 3.73468
Epoch 5/20
 - 405s - loss: 3.2441 - val_loss: 3.7465

Epoch 00005: val_loss did not improve from 3.73468
Epoch 6/20
 - 403s - loss: 3.1670 - val_loss: 3.7631

Epoch 00006: val_loss did not improve from 3.73468
Epoch 7/20
 - 403s - loss: 3.1060 - val_loss: 3.7761

Epoch 00007: val_loss did not improve from 3.73468
Epoch 8/20
 - 402s - lo

In [18]:
#save history
import pickle

with open(history_file_name, "wb") as pcklfile:
    pickle.dump(history, pcklfile)



In [16]:
import os 



In [17]:
os.getcwd()

'/data/home/mutaz/notebooks/dl_at3'