In [1]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import argparse
from keras.models import Model, load_model
from keras.applications.xception import Xception
from tensorflow.keras.utils import pad_sequences
from pickle import load
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.preprocessing.text import Tokenizer





In [2]:
with open("tokenizer.p", "rb") as f:
    tokenizer = load(f)
max_length = 39

caption_model = load_model('model.h5') 

In [9]:

def extract_features(filename, model):
        try:
            image = Image.open(filename)
        except:
            print("ERROR: Couldn't open image! Make sure the image path and extension is correct")
        image = image.resize((299,299))
        image = np.array(image)
        # for images that has 4 channels, we convert them into 3 channels
        if image.shape[2] == 4: 
            image = image[..., :3]
        image = np.expand_dims(image, axis=0)
        image = image/127.5
        image = image - 1.0
        feature = model.predict(image)
        return feature
def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
         if index == integer:
             return word
    return None
def generate_desc(model, tokenizer, photo, max_length):
    in_text = 'start'
    for i in range(max_length):
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        sequence = pad_sequences([sequence], maxlen=max_length)
        pred = model.predict([photo,sequence], verbose=3)
        pred = np.argmax(pred)
        word = word_for_id(pred, tokenizer)
        if word is None:
            break
        in_text += ' ' + word
        if word == 'end':
            break
    return in_text


In [10]:
import glob
images = glob.glob(f'test/*.jpg')
DenseNet201_model = DenseNet201(include_top=False, pooling="avg")

images

['test\\COCO_val2014_000000000042.jpg',
 'test\\COCO_val2014_000000000073.jpg',
 'test\\COCO_val2014_000000000074.jpg',
 'test\\COCO_val2014_000000000133.jpg',
 'test\\COCO_val2014_000000000136.jpg',
 'test\\COCO_val2014_000000000139.jpg',
 'test\\COCO_val2014_000000000143.jpg',
 'test\\COCO_val2014_000000000164.jpg',
 'test\\COCO_val2014_000000000192.jpg',
 'test\\COCO_val2014_000000000196.jpg',
 'test\\COCO_val2014_000000000208.jpg',
 'test\\COCO_val2014_000000000241.jpg',
 'test\\COCO_val2014_000000000257.jpg',
 'test\\COCO_val2014_000000000283.jpg',
 'test\\COCO_val2014_000000000285.jpg',
 'test\\COCO_val2014_000000000294.jpg',
 'test\\COCO_val2014_000000000328.jpg',
 'test\\COCO_val2014_000000000338.jpg',
 'test\\COCO_val2014_000000000357.jpg',
 'test\\COCO_val2014_000000000359.jpg',
 'test\\COCO_val2014_000000000360.jpg',
 'test\\COCO_val2014_000000000387.jpg',
 'test\\COCO_val2014_000000000395.jpg',
 'test\\COCO_val2014_000000000397.jpg',
 'test\\COCO_val2014_000000000400.jpg',


In [12]:
from tqdm import tqdm
res = [] 
count = 0
l = len(images)
for img_path in (images):    
  

    photo = extract_features(img_path, DenseNet201_model)
    img = Image.open(img_path)
    description = generate_desc(caption_model, tokenizer, photo, max_length)

    description = description.replace('start','')
    description = description.replace('endseq','')
    description = description.replace('is','')

    tags = description.split(' ')
    tags = [x for x in tags if x != '']

    tags = list(set(tags))
    
    stringTags =  ' '.join(tags)
    imgName = img_path.split("\\")[-1]
    res.append((imgName, stringTags))
    
    count += 1 
    if count % 10 ==10:
        print(count)





In [13]:
with open('output.txt', 'w') as f:
    f.write("image,tags\n")
    for item in res:
        f.write(f"{item[0]}, {item[1]}\n")

In [14]:
res

[('COCO_val2014_000000000042.jpg', 'dog man holding are'),
 ('COCO_val2014_000000000073.jpg', 'young man are playing'),
 ('COCO_val2014_000000000074.jpg', 'bicycle man walks'),
 ('COCO_val2014_000000000133.jpg', 'man floor holding'),
 ('COCO_val2014_000000000136.jpg', 'dog white man holding'),
 ('COCO_val2014_000000000139.jpg', 'table woman room are sitting man'),
 ('COCO_val2014_000000000143.jpg', 'white'),
 ('COCO_val2014_000000000164.jpg', 'holding woman are man young'),
 ('COCO_val2014_000000000192.jpg', 'player baseball'),
 ('COCO_val2014_000000000196.jpg', 'woman are sitting young man'),
 ('COCO_val2014_000000000208.jpg', 'dog are woman playing'),
 ('COCO_val2014_000000000241.jpg', 'woman are sitting young man'),
 ('COCO_val2014_000000000257.jpg', 'man street people are'),
 ('COCO_val2014_000000000283.jpg', 'hand man holding'),
 ('COCO_val2014_000000000285.jpg', 'dog brown'),
 ('COCO_val2014_000000000294.jpg', 'hand man young holding'),
 ('COCO_val2014_000000000328.jpg', 'holding

In [None]:
with open('output.txt', 'w') as f:
    f.write("image,tags\n")
    for item in res:
        f.write(f"{item[0]}, {item[1]}\n")