In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
os.environ["CHAINER_TYPE_CHECK"] = "0" #to disable type check. 
import chainer 
#Check che below is False if you disabled type check
#print(chainer.functions.Linear(1,1).type_check_enable) 

import os


import chainer.functions as F
from chainer import cuda
from chainer import Function, FunctionSet, Variable, optimizers, serializers

import argparse
import numpy as np
import cPickle as cpickle
import pickle
import random
import unicodecsv as csv
from tqdm import tqdm
import cv2

In [2]:
MEAN_VALUES = np.array([104, 117, 123]).reshape((3,1,1))
gpu_id=-1
#Load Caffe Model
cnn_model_place="../chainer_caption_generation/data/bvlc_googlenet_caffe_chainer.pkl"
with open(cnn_model_place, 'r') as f:
    func = pickle.load(f)
if gpu_id>= 0:
    func.to_gpu()
print "done"

done


In [3]:
def extract_img_feature(img_file):
    img=cv2.imread(img_file,cv2.IMREAD_COLOR)
    img=cv2.resize(img,(224,224))
    img=img.transpose(2, 0, 1)-MEAN_VALUES
    x_batch = np.array([img], dtype=np.float32)
    if gpu_id >=0:
        x = Variable(cuda.to_gpu(x_batch), volatile=True)
    else:
        x = Variable(x_batch, volatile=True)
    image_feature_chainer, = func(inputs={'data': x}, outputs=['pool5/7x7_s1'],
                  disable=['loss1/ave_pool', 'loss2/ave_pool','loss3/classifier'],
                  train=False)
    return image_feature_chainer.data.reshape(1024)

In [90]:
img_file="../sample_images/whole.png"
whole=extract_img_feature(img_file)
img_file="../sample_images/man_top.png"
man_top=extract_img_feature(img_file)
img_file="../sample_images/bicycle.png"
bicycle=extract_img_feature(img_file)
img_file="../sample_images/potted_plant.png"
potted_plant=extract_img_feature(img_file)
img_file="../sample_images/man_right.png"
man_right=extract_img_feature(img_file)
img_file="../sample_images/tree.png"
tree=extract_img_feature(img_file)
img_file="../sample_images/road.png"
road=extract_img_feature(img_file)
img_file="../sample_images/sky.png"
sky=extract_img_feature(img_file)
img_file="../sample_images/right_gate.png"
right_gate=extract_img_feature(img_file)

In [5]:
#Relation Model Preparation
gpu_id=-1

with open('../work/index2relation.pkl', 'r') as f:
    index2relation=pickle.load(f)
relation2index = dict((v, k) for k, v in index2relation.iteritems())

image_feature_dim=1024#image feature dimention per image
n_units = 1024  # number of units per layer
vocab_size=len(relation2index)

model_rel = chainer.FunctionSet()
model_rel.img_feature2vec=F.Linear(3*image_feature_dim, n_units)#parameter  W,b
model_rel.h1=F.Linear(n_units, n_units)#hidden unit,#parameter  W,b
model_rel.out=F.Linear(n_units, vocab_size)#parameter  W,b

model_dir='../experiment1/'
model_place=model_dir+'/relation_model%d.chainer'%0
serializers.load_hdf5(model_place, model_rel)#load modeldir

#To GPU
if gpu_id >= 0:
    model_rel.to_gpu()

#Define Newtowork (Forward)
def forward_rel(x_data):
    train=False
    x = Variable(x_data, volatile= not train)
    l1 = F.relu(model_rel.img_feature2vec(x))
    l2 = F.relu(model_rel.h1(l1))
    y = model_rel.out(l2)
    return F.softmax(y)

In [97]:
#Attribute Model Preparation

with open('../work/index2attribute.pkl', 'r') as f:
    index2attribute=pickle.load(f)
attribute2index = dict((v, k) for k, v in index2attribute.iteritems())

#Model Preparation
image_feature_dim=1024#image feature dimention per image
n_units = 128  # number of units per layer
vocab_size=len(attribute2index)

model_attr = chainer.FunctionSet()
model_attr.img_feature2vec=F.Linear(2*image_feature_dim, n_units)#parameter  W,b
model_attr.bn_feature=F.BatchNormalization(n_units)#parameter  sigma,gamma
model_attr.h1=F.Linear(n_units, n_units)#hidden unit,#parameter  W,b
model_attr.bn1=F.BatchNormalization(n_units)#parameter  gamma,beta
model_attr.out=F.Linear(n_units, vocab_size)#parameter  W,b

model_dir='../experiment2a/'
model_place=model_dir+'/attribute_model%d.chainer'%2
serializers.load_hdf5(model_place, model_attr)#load modeldir

#To GPU
if gpu_id >= 0:
    model_attr.to_gpu()

#Define Newtowork (Forward)
def forward_attr(x_data):
    train=True
    x = Variable(x_data,volatile=not train)
    feature_input = F.relu(model_attr.bn_feature(model_attr.img_feature2vec(x)))
    l1 = F.relu(model_attr.bn1(model_attr.h1(feature_input)))
    y = model_attr.out(l1)
    return F.softmax(y)

# attribute

In [98]:
def get_attribtue_score(feature1,feature2):
    vec=np.ones([2*image_feature_dim],dtype=np.float32)
    vec[0:image_feature_dim]=feature1
    vec[image_feature_dim:2*image_feature_dim]=feature2
    x_batch=np.array([vec],dtype=np.float32)
    dist=forward_attr(x_batch).data[0]
    return dist

In [99]:
score=get_attribtue_score(whole,man_top)
top_k = 10
top_index=np.argsort(score)[::-1][:top_k]
rank=1
for index in top_index:
    print('#%d \t %4.10f%% \t %s' % (rank, score[index]*100, index2attribute[index]))
    rank+=1

#1 	 0.6025316194% 	 white
#2 	 0.4626981448% 	 black
#3 	 0.3531251568% 	 blue
#4 	 0.2986895852% 	 visible
#5 	 0.2916357713% 	 green
#6 	 0.2750195330% 	 red
#7 	 0.2717502182% 	 here
#8 	 0.2491382649% 	 small
#9 	 0.2459737472% 	 large
#10 	 0.2394111594% 	 brown


In [132]:
score=get_attribtue_score(whole,bicycle)
top_k = 10
top_index=np.argsort(score)[::-1][:top_k]
rank=1
for index in top_index:
    print('#%d \t %4.10f%% \t %s' % (rank, score[index]*100, index2attribute[index]))
    rank+=1

#1 	 0.6025316194% 	 white
#2 	 0.4626981448% 	 black
#3 	 0.3531251568% 	 blue
#4 	 0.2986895852% 	 visible
#5 	 0.2916357713% 	 green
#6 	 0.2750195330% 	 red
#7 	 0.2717502182% 	 here
#8 	 0.2491382649% 	 small
#9 	 0.2459737472% 	 large
#10 	 0.2394111594% 	 brown


In [131]:
score=get_attribtue_score(whole,potted_plant)
top_k = 10
top_index=np.argsort(score)[::-1][:top_k]
rank=1
for index in top_index:
    print('#%d \t %4.10f%% \t %s' % (rank, score[index]*100, index2attribute[index]))
    rank+=1

#1 	 0.6025316194% 	 white
#2 	 0.4626981448% 	 black
#3 	 0.3531251568% 	 blue
#4 	 0.2986895852% 	 visible
#5 	 0.2916357713% 	 green
#6 	 0.2750195330% 	 red
#7 	 0.2717502182% 	 here
#8 	 0.2491382649% 	 small
#9 	 0.2459737472% 	 large
#10 	 0.2394111594% 	 brown


In [65]:
score=get_attribtue_score(whole,man_right)
top_k = 10
top_index=np.argsort(score)[::-1][:top_k]
rank=1
for index in top_index:
    print('#%d \t %4.10f%% \t %s' % (rank, score[index]*100, index2attribute[index]))
    rank+=1

#1 	 0.6025316194% 	 white
#2 	 0.4626981448% 	 black
#3 	 0.3531251568% 	 blue
#4 	 0.2986895852% 	 visible
#5 	 0.2916357713% 	 green
#6 	 0.2750195330% 	 red
#7 	 0.2717502182% 	 here
#8 	 0.2491382649% 	 small
#9 	 0.2459737472% 	 large
#10 	 0.2394111594% 	 brown


In [101]:
score=get_attribtue_score(whole,sky)
top_k = 10
top_index=np.argsort(score)[::-1][:top_k]
rank=1
for index in top_index:
    print('#%d \t %4.10f%% \t %s' % (rank, score[index]*100, index2attribute[index]))
    rank+=1

#1 	 0.6025316194% 	 white
#2 	 0.4626981448% 	 black
#3 	 0.3531251568% 	 blue
#4 	 0.2986895852% 	 visible
#5 	 0.2916357713% 	 green
#6 	 0.2750195330% 	 red
#7 	 0.2717502182% 	 here
#8 	 0.2491382649% 	 small
#9 	 0.2459737472% 	 large
#10 	 0.2394111594% 	 brown


In [102]:
score=get_attribtue_score(whole,road)
top_k = 10
top_index=np.argsort(score)[::-1][:top_k]
rank=1
for index in top_index:
    print('#%d \t %4.10f%% \t %s' % (rank, score[index]*100, index2attribute[index]))
    rank+=1

#1 	 0.6025316194% 	 white
#2 	 0.4626981448% 	 black
#3 	 0.3531251568% 	 blue
#4 	 0.2986895852% 	 visible
#5 	 0.2916357713% 	 green
#6 	 0.2750195330% 	 red
#7 	 0.2717502182% 	 here
#8 	 0.2491382649% 	 small
#9 	 0.2459737472% 	 large
#10 	 0.2394111594% 	 brown


In [103]:
score=get_attribtue_score(whole,tree)
top_k = 10
top_index=np.argsort(score)[::-1][:top_k]
rank=1
for index in top_index:
    print('#%d \t %4.10f%% \t %s' % (rank, score[index]*100, index2attribute[index]))
    rank+=1

#1 	 0.6025316194% 	 white
#2 	 0.4626981448% 	 black
#3 	 0.3531251568% 	 blue
#4 	 0.2986895852% 	 visible
#5 	 0.2916357713% 	 green
#6 	 0.2750195330% 	 red
#7 	 0.2717502182% 	 here
#8 	 0.2491382649% 	 small
#9 	 0.2459737472% 	 large
#10 	 0.2394111594% 	 brown


In [107]:
score=get_attribtue_score(whole,right_gate)
top_k = 10
top_index=np.argsort(score)[::-1][:top_k]
rank=1
for index in top_index:
    print('#%d \t %4.10f%% \t %s' % (rank, score[index]*100, index2attribute[index]))
    rank+=1

#1 	 0.6025316194% 	 white
#2 	 0.4626981448% 	 black
#3 	 0.3531251568% 	 blue
#4 	 0.2986895852% 	 visible
#5 	 0.2916357713% 	 green
#6 	 0.2750195330% 	 red
#7 	 0.2717502182% 	 here
#8 	 0.2491382649% 	 small
#9 	 0.2459737472% 	 large
#10 	 0.2394111594% 	 brown


# relation

In [108]:
def get_relation_score(feature1,feature2,feature3):
    vec=np.ones([3*image_feature_dim],dtype=np.float32)
    vec[0:image_feature_dim]=feature1
    vec[image_feature_dim:2*image_feature_dim]=feature2
    vec[2*image_feature_dim:3*image_feature_dim]=feature3
    x_batch=np.array([vec],dtype=np.float32)
    dist=forward_rel(x_batch).data[0]
    return dist

In [129]:
score=get_relation_score(whole,man_top,bicycle)
top_k = 10
top_index=np.argsort(score)[::-1][:top_k]
rank=1
for index in top_index:
    print('#%d \t %4.10f%% \t %s' % (rank, score[index]*100, index2relation[index]))
    rank+=1

#1 	 22.3912894726% 	 on
#2 	 11.6773158312% 	 riding
#3 	 5.6106146425% 	 is on a
#4 	 3.2452397048% 	 sitting on
#5 	 1.9956924021% 	 on a
#6 	 1.9871929660% 	 is riding a
#7 	 1.7613338307% 	 is on
#8 	 1.6828905791% 	 is riding
#9 	 1.6633620486% 	 pushing
#10 	 1.5282982029% 	 on the


In [130]:
score=get_relation_score(whole,man_right,road)
top_k = 10
top_index=np.argsort(score)[::-1][:top_k]
rank=1
for index in top_index:
    print('#%d \t %4.10f%% \t %s' % (rank, score[index]*100, index2relation[index]))
    rank+=1

#1 	 62.9625380039% 	 on
#2 	 7.0634752512% 	 on the
#3 	 5.4050892591% 	 in
#4 	 3.5611666739% 	 is on
#5 	 3.1290087849% 	 walking on
#6 	 2.5529829785% 	 is on the
#7 	 2.2439014167% 	 crossing
#8 	 1.2864099815% 	 near
#9 	 1.1378210038% 	 on top of
#10 	 0.9088567458% 	 standing on


In [117]:
score=get_relation_score(whole,bicycle,road)
top_k = 10
top_index=np.argsort(score)[::-1][:top_k]
rank=1
for index in top_index:
    print('#%d \t %4.10f%% \t %s' % (rank, score[index]*100, index2relation[index]))
    rank+=1

#1 	 70.2488660812% 	 on
#2 	 10.5984918773% 	 on the
#3 	 3.9970472455% 	 in
#4 	 2.5582181290% 	 near
#5 	 2.2435607389% 	 is on the
#6 	 1.5789568424% 	 is on
#7 	 0.6154546048% 	 by
#8 	 0.5153512117% 	 sitting on
#9 	 0.4893499427% 	 on top of
#10 	 0.4531901795% 	 parked on


In [118]:
score=get_relation_score(whole,bicycle,sky)
top_k = 10
top_index=np.argsort(score)[::-1][:top_k]
rank=1
for index in top_index:
    print('#%d \t %4.10f%% \t %s' % (rank, score[index]*100, index2relation[index]))
    rank+=1

#1 	 49.6615231037% 	 on
#2 	 11.6530567408% 	 in
#3 	 5.3632650524% 	 on the
#4 	 3.6487247795% 	 near
#5 	 2.4069139734% 	 is on the
#6 	 2.2162707523% 	 in the
#7 	 1.9974531606% 	 is on
#8 	 1.0195360519% 	 by
#9 	 0.9591570124% 	 are on the
#10 	 0.8994709700% 	 of


In [104]:
score=get_relation_score(whole,man_right,potted_plant)
top_k = 10
top_index=np.argsort(score)[::-1][:top_k]
rank=1
for index in top_index:
    print('#%d \t %4.10f%% \t %s' % (rank, score[index]*100, index2relation[index]))
    rank+=1

#1 	 12.1479392052% 	 has
#2 	 10.7314974070% 	 wearing
#3 	 9.4092175364% 	 carrying
#4 	 8.8208362460% 	 with
#5 	 7.2387009859% 	 holding
#6 	 2.9198935255% 	 has a
#7 	 2.4595797062% 	 on
#8 	 2.1050551906% 	 is holding
#9 	 2.0192829892% 	 holds
#10 	 1.9276527688% 	 in


In [124]:
score=get_relation_score(whole,potted_plant,right_gate)
top_k = 10
top_index=np.argsort(score)[::-1][:top_k]
rank=1
for index in top_index:
    print('#%d \t %4.10f%% \t %s' % (rank, score[index]*100, index2relation[index]))
    rank+=1

#1 	 24.1783142090% 	 on
#2 	 14.4534766674% 	 in
#3 	 6.4995221794% 	 of
#4 	 4.9632608891% 	 near
#5 	 4.5189391822% 	 on the
#6 	 3.5161804408% 	 behind
#7 	 2.6009608060% 	 is on
#8 	 2.4088174105% 	 in front of
#9 	 2.1800706163% 	 by
#10 	 1.7690252513% 	 next to
