In [1]:
# %load demo.py
from __future__ import division

import os
import sys
from shutil import copyfile
import numpy as np
from skimage import io
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score

import torch
from torch.nn import *
import torch.nn.functional as F
from torch import optim
import torchvision.models as models
from torch.utils.data import DataLoader

from models.model import ImgSpecModel
from dataloaderraw import ImageSpecDataset

ImgSpecModel(
  (spec_layers): Sequential(
    (0): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(4, 8, kernel_size=(3, 3), stride=(1, 1))
    (5): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1))
    (9): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (reduction): Sequential(
    (0): Linear(in_features=14400, out_features=2048, bias=True)
  )
  (classifier): Sequential(
    (0): Linear(in_features=4096, out_features=512, bi

In [8]:
def run_demo(root_dir, img_name, demo_out_dir):
    lines = open(os.path.join(root_dir, 'test_split.tsv'), 'r').readlines()

    obj_list = []
    for line in lines:
        line_split = line.split('\t')
        if line_split[1] == img_name:
            obj_list.append(line_split[0])

    img_spec_list = []
    for i in range(len(obj_list)):
        for j in range(len(obj_list)):
            img_spec_list.append((obj_list[i], obj_list[j]))

    # combined model
    imgspec_model = ImgSpecModel()

    imgspec_model.load_state_dict(torch.load('save_50_epoch_f1_score\\model_best_acc_33_0.586962471491.pth',map_location=torch.device('cpu')))

    img_spec_list_w_scores = []
    for img, spec in img_spec_list:
        img_name = os.path.join(root_dir, 'test', 'img_feats', img.split('.')[0]+'.npy')
        img_feats = torch.from_numpy(np.load(img_name)).float().unsqueeze(0)
        spec_name = os.path.join(root_dir, 'test', 'spec', spec.split('.')[0]+'.png')
        spec_img = torch.from_numpy(io.imread(spec_name, as_gray=True) / 255.0)
        spec_img = spec_img.unsqueeze(0).unsqueeze(1).float()
        pred = F.softmax(imgspec_model(img_feats, spec_img).detach(), dim=1).detach().cpu().numpy()
        img_spec_list_w_scores.append((img, spec, pred[0][1]))

    img_spec_list_w_scores.sort(key=lambda x: -x[2])
    used_img, used_spec = {}, {}

    file_cnt = 0
    for img, spec, score in img_spec_list_w_scores:
        if img not in used_img and spec not in used_spec and score >= 0.5:
            file_cnt += 1
            used_img[img] = True
            used_spec[spec] = True
            out_img = os.path.join(demo_out_dir, '{}.jpg'.format(file_cnt))
            out_wav = os.path.join(demo_out_dir, '{}.wav'.format(file_cnt))
            in_img = os.path.join(root_dir, 'test', 'img', '{}.jpg'.format(img))
            in_wav = os.path.join(root_dir, 'test', 'wav', '{}.wav'.format(img))
            copyfile(in_img, out_img)
            copyfile(in_wav, out_wav)

In [10]:
root_dir = 'small_dataset_objects'
img_name = 'COCO_val2014_000000332653.jpg'
demo_out_dir = 'new_dir1'

run_demo(root_dir, img_name, demo_out_dir)