# I2T and T2I search

In [None]:
%matplotlib inline
import os.path as op


import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from nltk.corpus import stopwords
import numpy as np
from pycocotools.coco import COCO
import seaborn as sns

from retrieval_pipelines import absolute_coco_path, tag_to_image_search, image_to_tag_search, most_common_tags
from text_processing import create_caption_dataframe

### Coco API loading

In [None]:
data_dir = "dataset"
data_type = "train2014"
ann_file = op.join(
    data_dir, "annotations", "instances_{0}.json".format(data_type))
coco = COCO(ann_file)

### Pre-calculated features loading

In [None]:
W_visual = np.load("W1_all_2_view.npy")
W_text = np.load("W2_all_2_view.npy")
V = np.load("data/V_final.npy")
T = np.load("T.npy")
img_ids = np.load("data/visual_img_ids_final.npy")
W_text = W_text.T
W_visual = W_visual.T

In [None]:
top_eigenvals = np.load("top_eigvals_all_2_view.npy")
D = np.diag(top_eigenvals**4)

## Tag to image search

In [None]:
database_images = W_visual.dot(V.T)
database_images = database_images.T

In [None]:
tag = "night"
feature_path = "{0}_features.npy".format("-".join(tag.split()))

In [None]:
if not op.exists(feature_path):
    import gensim
    
    from text_processing import sentence2vec
    
    print('\nLoading word2vec model ...')
    model_path = op.join('models', 'GoogleNews-vectors-negative300.bin')
    model = gensim.models.Word2Vec.load_word2vec_format(model_path, binary=True)
    tag_features = sentence2vec(tag, model)
    del model
    np.save(feature_path, tag_features)

In [None]:
tag_features = np.load(feature_path)

In [None]:
retrieved_img_ids = tag_to_image_search(tag_features, W_text, database_images, img_ids, coco, D, n_images=5, distance="similarity")
img_paths = [absolute_coco_path(int(img_id), coco) for img_id in retrieved_img_ids]

In [None]:
sns.set_style("dark")
for im_idx, img_path in enumerate(img_paths):
    plt.figure(im_idx)
    img = mpimg.imread(img_path)
    imgplot = plt.imshow(img)

# Save images for all tags of interest

In [None]:
tags_of_interest = ["dog skateboard", "red", "yellow", "cute rabbit", "cheerleader girls", "sail", "ocean", "ship sunset",
        "ship", "giraffe", "sunset", "beautiful", "proud", "jump", "jumping", "water", "mess", "crowd", "party"]
im_dir = op.join("report", "euclidean_2_view")
for tag in tags_of_interest:
    feature_path = "{0}_features.npy".format("-".join(tag.split()))
    if not op.exists(feature_path):
        import gensim

        from text_processing import sentence2vec

        print('\nLoading word2vec model ...')
        model_path = op.join('models', 'GoogleNews-vectors-negative300.bin')
        model = gensim.models.Word2Vec.load_word2vec_format(model_path, binary=True)
        tag_features = sentence2vec(tag, model)
        del model
        np.save(feature_path, tag_features)
    tag_features = np.load(feature_path)
    retrieved_img_ids = tag_to_image_search(tag_features, W_text, database_images, img_ids, coco, D, n_images=5, distance="euclidean")
    img_paths = [absolute_coco_path(int(img_id), coco) for img_id in retrieved_img_ids]
    sns.set_style("dark")
    for im_idx, img_path in enumerate(img_paths):
        plt.figure(im_idx)
        img = mpimg.imread(img_path)
        imgplot = plt.imshow(img)
        img_path = "{0}_{1}.png".format("-".join(tag.split()), im_idx+1)
        plt.savefig(op.join(im_dir, img_path))

## Image to tag search

In [None]:
database_captions = W_text.dot(T.T)
database_captions = database_captions.T

In [None]:
image_name = "giraffe.jpg"
image_path = op.join(data_dir, "test", image_name)
visual_feature_path = "{0}_visual_features.npy".format(image_name.split(".")[0])

In [None]:
plt.figure()
plt.imshow(mpimg.imread(image_path))

In [None]:
if not op.exists(visual_feature_path):
    from keras.applications import vgg19
    from keras.optimizers import SGD
    
    from image_processing import process_image
    from vgg import compute_nn_features
    
    img_mat = process_image(image_path)
    net = vgg19.VGG19()
    sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    net.compile(optimizer=sgd, loss='categorical_crossentropy')
    visual_features = compute_nn_features([img_mat], net)
    np.save("{0}_visual_features".format(image_name.split(".")[0]), visual_features)
    del net
    del sgd

In [None]:
visual_features = np.load(visual_feature_path)
visual_features = np.reshape(visual_features, (W_visual.shape[1]))

In [None]:
n_tags = 4
annotations = image_to_tag_search(visual_features.T, W_visual, database_captions, img_ids, coco,D,
                                  n_tags=n_tags, expanding_factor=10)

In [None]:
stops = set(stopwords.words('english'))

In [None]:
most_common_tags(annotations, n_tags, stops)

In [None]:
images_of_interest = ["giraffe.jpg", "boat.jpg", "desk.jpg", "teddy_bear.jpg", "elephant_drawing.jpg"]
for image_name in images_of_interest:
    visual_feature_path = "{0}_visual_features.npy".format(image_name.split(".")[0])
    visual_features = np.load(visual_feature_path)
    visual_features = np.reshape(visual_features, (W_visual.shape[1]))
    n_tags = 4
    annotations = image_to_tag_search(visual_features.T, W_visual, database_captions, img_ids, coco,D,
                                      n_tags=n_tags, expanding_factor=10)
    stops = set(stopwords.words('english'))
    mmt = most_common_tags(annotations, n_tags, stops)
    print("{0}: {1}".format(image_name, mmt))

## Precision recall (PR) curve for T2I
Given that the caption evaluation is not available in the Python MS Coco API, we chose to check whether an image was correctly returned by seeing if the tag was present in the description.

In [None]:
df_caption = create_caption_dataframe()

In [None]:
tags = "giraffe snowboard boat snow water".split()
features_paths = ["{0}_features.npy".format("-".join(tag.split())) for tag in tags]

We do a pre-processing of all the tags in order not to have to run the forward pass everytime

In [None]:
for tag_idx, tag in enumerate(tags):
    import gensim
    from text_processing import sentence2vec
    if not op.exists(features_paths[tag_idx]):
        try:
            model
        except NameError:
            print("Loading Word2Vec model")
            model_path = op.join('models', 'GoogleNews-vectors-negative300.bin')
            model = gensim.models.Word2Vec.load_word2vec_format(model_path, binary=True)
        finally:
            tag_features = sentence2vec(tag, model)
            np.save(features_paths[tag_idx], tag_features)
            del tag_features
try:
    del model
except NameError:
    print("its all good man")

To calculate the recall, we need to first compute the number of images for this tag

In [None]:
counter = dict()
for tag in tags:
    counter[tag] = 0
for caption in df_caption.loc[img_ids]["caption"]:
    for tag in tags:
        counter[tag] += int(all([(sub_tag in caption) for sub_tag in tag.split()]))

In [None]:
counter

In [None]:
max_n_images = 70000
precisions_cosine = dict()
recalls_cosine = dict()
precisions_euclidean = dict()
recalls_euclidean = dict()
numbers_of_images = [1, 200, 500, 800, 900, 1000, 1200, 1500, 1700, 2000, 2500, 3000, 3400, 3700, 5000, 10000, 25000, 40000, 50000, 70000]
for tag in tags:
    precisions_cosine[tag] = np.zeros(len(numbers_of_images))
    recalls_cosine[tag] = np.zeros(len(numbers_of_images))
    precisions_euclidean[tag] = np.zeros(len(numbers_of_images))
    recalls_euclidean[tag] = np.zeros(len(numbers_of_images))

In [None]:
for tag_idx, tag in enumerate(tags):
    tag_features = np.load(features_paths[tag_idx])
    for i, n_images in enumerate(numbers_of_images):
        retrieved_img_ids = tag_to_image_search(tag_features, W_text, database_images, img_ids, coco,D,
                                                n_images=n_images)
        correct_number = 0
        for caption in df_caption.loc[retrieved_img_ids]["caption"]:
            correct_number += int(all([sub_tag in caption for sub_tag in tag.split()]))
        precisions_cosine[tag][i] = correct_number*100 / n_images
        recalls_cosine[tag][i] = correct_number*100 / counter[tag]

In [None]:
for tag_idx, tag in enumerate(tags):
    tag_features = np.load(features_paths[tag_idx])
    for i, n_images in enumerate(numbers_of_images):
        retrieved_img_ids = tag_to_image_search(tag_features, W_text, database_images, img_ids, coco,D,
                                                n_images=n_images, distance="euclidean")
        correct_number = 0
        for caption in df_caption.loc[retrieved_img_ids]["caption"]:
            correct_number += int(all([sub_tag in caption for sub_tag in tag.split()]))
        precisions_euclidean[tag][i] = correct_number*100 / n_images
        recalls_euclidean[tag][i] = correct_number*100 / counter[tag]

In [None]:
sns.set_style("darkgrid")
for tag_idx, tag in enumerate(tags):
    precision_cosine = precisions_cosine[tag]
    recall_cosine = recalls_cosine[tag]
    precision_euclidean = precisions_euclidean[tag]
    recall_euclidean = recalls_euclidean[tag]
    plt.figure(tag_idx)
    plt.title("Precision-Recall curve for {0}".format(tag))
    plt.plot(recall_cosine, precision_cosine, label="cosine distance")
    plt.plot(recall_euclidean, precision_euclidean, label="euclidean distance")
    plt.ylabel("Precision (%)")
    plt.xlabel("Recall (%)")
    plt.legend(loc=3)
    plt.axis([0, 100, 0, 100])
    plt.show()