In [83]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras
from nltk.corpus import stopwords
import string
import json
from time import time
import pickle
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
from keras.preprocessing import image
from keras.models import Model, load_model
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.layers import Input, Dense, Dropout, Embedding, LSTM
from keras.layers.merge import add



In [71]:
model = load_model('model_weights/model_9.h5')
model._make_predict_function()


In [72]:
model_temp=ResNet50(weights="imagenet",input_shape=(224,224,3))
model_temp.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_6[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 112, 112, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [73]:
model_resnet = Model(model_temp.input,model_temp.layers[-2].output) # Creating a new layer by removing last layer
model_resnet._make_predict_function()


In [74]:
def preprocess_image(img):
    img = image.load_img(img,target_size=(224,224))
    img = image.img_to_array(img)
    img = np.expand_dims(img,axis=0)
    # Normalisation
    img = preprocess_input(img)
    return img

In [75]:
def encode_image(img):
    img_ = preprocess_image(img)
    feature_vector = model_resnet.predict(img_)
    feature_vector = feature_vector.reshape(1,feature_vector.shape[1])
    return feature_vector

In [76]:
with open("word_to_idx.pxl", "rb") as w2i:
    word_to_idx = pickle.load(w2i)
    
with open("idx_to_word.pxl", "rb") as i2w:
    idx_to_word = pickle.load(i2w)

In [78]:
def predict_caption(photo):
    in_text = "startseq"
    max_len = 35
    for i in range(max_len):
        sequence = [word_to_idx[w] for w in in_text.split() if w in word_to_idx]
        sequence = pad_sequences([sequence], maxlen=max_len, padding='post')

        ypred =  model.predict([photo,sequence])
        ypred = ypred.argmax()
        word = idx_to_word[ypred]
        in_text+= ' ' +word
        
        if word =='endseq':
            break
        
        
    final_caption =  in_text.split()
    final_caption = final_caption[1:-1]
    final_caption = ' '.join(final_caption)
    
    return final_caption

In [79]:
enc = encode_image('static/image.jpg')

In [80]:
predict_caption(enc)

'two children are playing in water fountain'