In [1]:
import json
from collections import Counter
import toolz
from IPython.display import Image, HTML
import tqdm

In [2]:
images = json.load(open('/Users/kcarnold/src/ImageCaptioning.pytorch/data/dataset_coco.json'))['images']

In [4]:
id2url = {img['id']: img['coco_url'] for img in
              json.load(open('/Data/COCO/annotations/captions_train2017.json'))['images'] +
              json.load(open('/Data/COCO/annotations/captions_val2017.json'))['images']
         }

In [5]:
Counter(img['split'] for img in images)

Counter({'restval': 30504, 'test': 5000, 'train': 82783, 'val': 5000})

In [6]:
images_by_split = toolz.groupby('split', images)
valid_images = images_by_split['val']
len(valid_images)

5000

In [9]:
def coco_url(cocoid):
    return id2url[cocoid]
#     return f'http://images.cocodataset.org/train2017/{cocoid:012d}.jpg'

In [10]:
coco_url(valid_images[0]['cocoid'])

'http://images.cocodataset.org/train2017/000000184613.jpg'

In [87]:
def show_images(indices):
    valid_images = images_by_split['val']
    def img(idx):
        img = valid_images[idx]
        captions = '\n'.join(
            '<div>{}</div>'.format(sent)
            for sent in toolz.pluck('raw', img['sentences'])
        )
        return '<div style="display: inline-block;"><img src="{}">{}</div>'.format(coco_url(img['cocoid']), captions)

    return '\n'.join(img(idx) for idx in indices)


In [9]:
import h5py

  from ._conv import register_converters as _register_converters


In [30]:
img_data_file = h5py.File('/Users/kcarnold/code/textrec/models-aside/feats_by_imgid.h5')

In [31]:
def get_vec_for_image(cocoid):
    return img_data_file[str(cocoid)][:].mean(axis=0)

In [32]:
mean_features = np.array([get_vec_for_image(img['cocoid']) for img in tqdm.tqdm_notebook(images)])




In [None]:
from sklearn.metrics import pairwise_distances

In [None]:
pdists = pairwise_distances(mean_features)

In [None]:
def show_similar_images(valid_idx, n=10):
    return '\n'.join(
        '<img src="{}">'.format(coco_url(valid_images[idx]['cocoid']))
        for idx in np.argsort(pdists[valid_idx])[:n])
HTML(show_similar_images(1))

In [18]:
HTML(show_similar_images(3))

In [58]:
id2img = {img['cocoid']: img for img in images}

In [69]:
# query_words = 'red bird bowl'
# query_words = 'kite green field'
# query_words = 'kitchen island'
# query_words = 'kitten red scarf'
# query_words = 'bluebird branch'
# query_words = 'woman tennis'
# query_words = 'crosswalk traffic light'
def find_matching_imgs(query_words, limit=10):
    query_words = query_words.split()
    scores = (sum(1 for word in query_words for sent in img['sentences'] if word in sent['raw'].lower()) for img in images)
    scored_images = [(img, score) for img, score in zip(images, scores) if score > 0]
    scored_images.sort(key=lambda x: x[1], reverse=True)
    return HTML('\n'.join('<img src="{}">'.format(coco_url(img['cocoid'])) for img, score in scored_images[:limit]))
# a plate with pancakes with bananas and bacon with a cute coffee mug behind it
find_matching_imgs('pancake banana bacon')

In [18]:
# a red bird and green bird about to eat out of a dish
find_matching_imgs('red green bird eat dish bowl')

In [23]:
# a man is watching a field of sheep eat grass.
find_matching_imgs('person watch sheep field eat grass', limit=50)

In [81]:
pdists2=pairwise_distances([get_vec_for_image(stimulus_images[8]['cocoid'])], mean_features)[0]
HTML('\n'.join(
        '<img src="{}">'.format(coco_url(images[idx]['cocoid']))
        for idx in np.argsort(pdists2)[:10]))

In [25]:
# a man is standing with a very large colorful kite.
find_matching_imgs('man colorful kite')

In [27]:
# a black cat with big green eyes with something red behind its head.
find_matching_imgs('black cat red', limit=50)

In [29]:
# a small white and blue bird perched on a branch of a fir tree.
find_matching_imgs('white blue bird branch', limit=50)

In [70]:
# a young girl in a red top and black skirt playing tennis.
find_matching_imgs('girl red black tennis')

In [71]:
# a yellow light hanging above a pedestrian crossing sign.
find_matching_imgs('yellow light pedestrian')

In [72]:
#breakfast that consists of pancakes banana bacon blackerry with maple syrup and a smiley mug in the background
find_matching_imgs('pancake banana bacon blackberry')

In [74]:
#tri colored bird eating fruits from a bowl
find_matching_imgs('bird eat bowl')

In [75]:
#flock of sheep grazing in an open field
find_matching_imgs('sheep field')

In [78]:
# man with kite with a bike and roller blader in the background
find_matching_imgs('kite bicyc blade')

In [80]:
# kitchen with marble counter tops and wooden cabinets and wooden chairs
find_matching_imgs('kitchen marble wood chair')

In [84]:
# black kitty with a red knitted scarf
find_matching_imgs('cat scarf')

In [88]:
# a bluejay perched on a tree looking right
find_matching_imgs('blue jay tree right')

In [89]:
# a girl wearing red and black clothes playing tennis
find_matching_imgs('girl woman red black tennis')

In [90]:
# a yellow pedestrian crossing sign sittting below a signal light that is yellow.
find_matching_imgs('yellow sign light')

In [91]:
# a green field has a small group of white sheep grazing on it
find_matching_imgs('green white sheep')

In [92]:
# a man is in a field holding a kite to the ground
find_matching_imgs('kite ground')

In [93]:
# the lights are on in a green kitchen with dark cabinets
find_matching_imgs('green kitchen')

In [94]:
#a black cat looks to the left
find_matching_imgs('black cat')

In [95]:
#a girl reacts to a bouncing tennis ball while holding her racket up
find_matching_imgs('girl woman tennis')

In [96]:
#. a lone sheep by itself in a green field with other sheep around it
find_matching_imgs('sheep green')

In [97]:
#a man in blue pants setting up a multi-colored kite on a grass field.
find_matching_imgs('man blue pants kite color grass field')

In [100]:
# a green colored kitchen with a tiled floor. there is an island in the middle
find_matching_imgs('green green kitchen tile island', limit=20)

In [101]:
#a blue jay looking to the left on a fir branch.
find_matching_imgs('blue jay bird fir tree branch')

In [102]:
# a female tennis player with a red shirt about to hit a tennis ball.
find_matching_imgs('female woman girl tennis red')

In [105]:
#a man stands next to a large kite on a grass lawn
find_matching_imgs('man person kite grass green lawn', limit=40)

In [106]:
#a kitchen has dark wood cabinets and stainless steel appliances
find_matching_imgs('kitchen dark wood cabinet stainless steel appl')

In [107]:
#a blue and white bird sits on a tree branch
find_matching_imgs('blue white bird tree branch')