In [28]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import argparse
from keras.models import Model, load_model
from keras.applications.xception import Xception
from tensorflow.keras.utils import pad_sequences
from pickle import load


import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\owais\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\owais\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [29]:
def txtToTags(caption):    
    # Tokenize the caption
    tokens = word_tokenize(caption)

    # Tag each token with its part of speech
    tagged_tokens = pos_tag(tokens)

    # Extract specific tokens based on their part of speech
    nouns = [token[0] for token in tagged_tokens if token[1].startswith("N")]
    verbs = [token[0] for token in tagged_tokens if token[1].startswith("V")]
    adjectives = [token[0] for token in tagged_tokens if token[1].startswith("J")]

    # Combine extracted tokens into meaningful phrases
    noun_phrases = []
    current_phrase = []
    for token in tagged_tokens:
        if token[1].startswith("N"):
            current_phrase.append(token[0])
        elif current_phrase:
            # Combine consecutive nouns into noun phrases
            noun_phrases.append(" ".join(current_phrase))
            current_phrase = []
    if current_phrase:
        noun_phrases.append(" ".join(current_phrase))

    verb_phrases = verbs

    adjective_phrases = []
    current_phrase = []
    for token in tagged_tokens:
        if token[1].startswith("J"):
            current_phrase.append(token[0])
        elif current_phrase:
            # Combine consecutive adjectives into adjective phrases
            adjective_phrases.append(" ".join(current_phrase))
            current_phrase = []
    if current_phrase:
        adjective_phrases.append(" ".join(current_phrase))

    # Combine all extracted phrases into a single list of tokens
    extracted_tokens =  verb_phrases + adjective_phrases + noun_phrases
    
    
    return extracted_tokens

In [30]:
def extract_features(filename, model):
        try:
            image = Image.open(filename)
        except:
            print("ERROR: Couldn't open image! Make sure the image path and extension is correct")
        image = image.resize((299,299))
        image = np.array(image)
        # for images that has 4 channels, we convert them into 3 channels
        if image.shape[2] == 4: 
            image = image[..., :3]
        image = np.expand_dims(image, axis=0)
        image = image/127.5
        image = image - 1.0
        feature = model.predict(image)
        return feature
def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
         if index == integer:
             return word
    return None
def generate_desc(model, tokenizer, photo, max_length):
    in_text = 'start'
    for i in range(max_length):
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        sequence = pad_sequences([sequence], maxlen=max_length)
        pred = model.predict([photo,sequence], verbose=0)
        pred = np.argmax(pred)
        word = word_for_id(pred, tokenizer)
        if word is None:
            break
        in_text += ' ' + word
        if word == 'end':
            break
    return in_text


In [31]:
import glob
images = glob.glob(f'test/*.jpg')

max_length = 32
tokenizer = load(open("tokenizer.p","rb"))
model = load_model('models/model_9.h5')
xception_model = Xception(include_top=False, pooling="avg")
res = []

In [32]:
for img_path in images:


    photo = extract_features(img_path, xception_model)
    img = Image.open(img_path)
    description = generate_desc(model, tokenizer, photo, max_length)

    tags = txtToTags(description)

    tags = [x for x in tags if x != '']

    tags = list(set(tags))
    
    stringTags =  ' '.join(tags)
    
    stringTags = stringTags.replace('start', '')
    stringTags = stringTags.replace('end', '')
    
    imgName = img_path.split("\\")[-1]
    res.append((imgName, stringTags))





In [33]:
res

[('COCO_val2014_000000000042.jpg', 'air  holding  baby man is'),
 ('COCO_val2014_000000000073.jpg', ' red riding  man bike helmet is shirt'),
 ('COCO_val2014_000000000074.jpg', 'ball red playing children are grass '),
 ('COCO_val2014_000000000133.jpg',
  'red ball  red boy playing  is young shirt'),
 ('COCO_val2014_000000000136.jpg', ' red  man sitting bench is shirt'),
 ('COCO_val2014_000000000139.jpg', ' playing children are bed'),
 ('COCO_val2014_000000000143.jpg', 'dog  running grass  is black'),
 ('COCO_val2014_000000000164.jpg', ' red  man sitting bench is shirt'),
 ('COCO_val2014_000000000192.jpg',
  'red  baseball player uniform playing is white baseball '),
 ('COCO_val2014_000000000196.jpg', '  man sitting bench is black shirt'),
 ('COCO_val2014_000000000208.jpg', 'are playing children grass '),
 ('COCO_val2014_000000000241.jpg', '  woman sitting bench is black shirt'),
 ('COCO_val2014_000000000257.jpg', 'front ocean  red standing  man is shirt'),
 ('COCO_val2014_000000000283.

In [34]:
with open('output8kOldModel.txt', 'w') as f:
    f.write("image,tags\n")
    for item in res:
        f.write(f"{item[0]}, {item[1]}\n")