# Tutorial code for ad-hoc video search

In [1]:
import os
import torch
import numpy as np

from model import get_model
from bigfile import BigFile
from evaluation import compute_sim
from common import ROOT_PATH as rootpath

### Load checkpoint to initialize model

In [2]:
resume_file = './runs_0/model_best.pth.tar'
if torch.cuda.is_available():
    checkpoint = torch.load(resume_file)
else:
    checkpoint = torch.load(resume_file,map_location='cpu')
best_perf = checkpoint['best_perf']
config = checkpoint['config']

model = get_model('w2vvpp')(config)
model.load_state_dict(checkpoint['model'])
print("=> loaded checkpoint '{}' (best_perf {})"
      .format(resume_file, best_perf))

print(model.vis_net)
print(model.txt_net)

=> loaded checkpoint './runs_0/model_best.pth.tar' (best_perf 0.519786344882)
VisTransformNet(
  (fc1): Linear(in_features=2048, out_features=2048, bias=True)
  (activation): Tanh()
)
MultiScaleTxtNet(
  (encoder): MultiScaleTxtEncoder(
    (rnn_encoder): GruTxtEncoder(
      (we): Embedding(11286, 500)
      (rnn): GRU(500, 1024, batch_first=True)
    )
    (w2v_encoder): W2VTxtEncoder()
    (bow_encoder): BoWTxtEncoder()
  )
  (transformer): TxtTransformNet(
    (fc1): Linear(in_features=12671, out_features=2048, bias=True)
    (activation): Tanh()
  )
)


### Embed video feature

In [3]:
# load video feature
video_collection = 'tv2016train'
feat_name = 'mean_pyresnext-101_rbps13k,flatten0_output,os'
vid_feat_dir = os.path.join(rootpath, video_collection, 'FeatureData', feat_name)
vid_feat_file = BigFile(vid_feat_dir)

videoset = vid_feat_file.names
renamed, vectors = vid_feat_file.read(videoset)
nr_videos = len(renamed)
vis_vecs = np.array([model.embed_vis(x)[0].numpy() for x in vectors])

[BigFile] 200x2048 instances loaded from /home/chaoxi/VisualSearch/tv2016train/FeatureData/mean_pyresnext-101_rbps13k,flatten0_output,os


### Embed sentence

In [4]:
sent = 'a dog is playing with a cat'
sent_vec = model.embed_txt(sent).numpy()

### Compute similarity

In [5]:
ranklist = [(renamed[i], sim) for i, sim in enumerate(compute_sim(sent_vec, vis_vecs, measure='cosine')[0])]
ranklist.sort(key=lambda v:v[1], reverse=True)

print (ranklist[:5])

cosine_sim execution time: 0.007

[('tv2016train_video92', 0.39867592), ('tv2016train_video128', 0.3559459), ('tv2016train_video147', 0.35549513), ('tv2016train_video26', 0.26383188), ('tv2016train_video14', 0.25476596)]
