In [107]:
import sys
import os
from os.path import splitext, join, exists
import json

import scipy.io as sio
import numpy as np
import torch
import torch.nn as nn

from torch.autograd import Variable
import torch.nn.functional as F

## Load data

In [30]:
def load(filename):
    return sio.loadmat(filename, appendmat=False, squeeze_me=True)['data']

def normalize_rows(mat, ord=2):
    ''' return a row normalized matrix
    '''
    assert mat.ndim == 2
    norms = zeros_to_eps(np.linalg.norm(mat, ord=ord, axis=1))
    return mat / norms.reshape(-1, 1)

def zeros_to_eps(mat):
    ''' replace zeros in a matrix by a tiny constant
    '''
    mat[np.isclose(mat, 0.)] = np.finfo(mat.dtype).eps
    return mat

In [6]:
anno = json.load(open("../data/coco_noun.tags", 'r'))
vecs = json.load(open("../data/coco_noun.word2vec", 'r'))

In [7]:
vec_dim = -1
for w, vec in vecs.items():
    if vec is not None:
        vec_dim = len(vec)
        break
if vec_dim is None:
    raise RuntimeError("couln'\t set embeddings dimensionality")

In [23]:
X, Y = {}, {}
for set_ in [k for k in anno.keys() if k != "tags"]:
    for i, imid in enumerate(imid_list):
        # set image features
        #fname = splitext(anno[set_][imid]["file_name"])[0] + ".dat"
        if fname in os.listdir("../data/" + set_):
            print(fname)
            break
            x = load(join("../data/", set_, fname))
            if i == 0:
                n_samples = len(imid_list)
                n_dim = len(x)
                X[set_] = np.empty((n_samples, n_dim), dtype=np.float32)
            X[set_][i] = normalize_rows(x.reshape(1, -1)).squeeze()

            # set word embeddings (OOV tags are set to the zero vector)
            tags = anno[set_][imid]["tags"]
            y = [[0]*vec_dim if vecs[w] is None else vecs[w] for w in tags]
            Y[set_][i] = normalize_rows(np.array(y, dtype=np.float32))

COCO_train2014_000000111189.dat


In [47]:
x = load(join("../data/", "train2014", "COCO_train2014_000000111189.dat"))
x = normalize_rows(x.reshape(1, -1)).squeeze()
n_dim = len(x)
tags = anno["train2014"]["111189"]["tags"]
y = [[0]*vec_dim if vecs[w] is None else vecs[w] for w in tags]
y = normalize_rows(np.array(y, dtype=np.float32))

In [53]:
x.shape, len(y), y[0].shape

((4096,), 11, (300,))

In [54]:
X_train = torch.from_numpy(x)
Y_train = [torch.from_numpy(elem) for elem in y]

In [62]:
X_train.size(), len(Y_train), Y_train[0].size()

(torch.Size([4096]), 11, torch.Size([300]))

In [74]:
dim_X, dim_Y = X_train.shape[0], Y_train[0].shape[0]
dim_X, dim_Y

(4096, 300)

## Model

In [129]:
### Bilineal
class Bilinear(nn.Module):
    def __init__(self, in1_features, in2_features, bias=True):
        super().__init__()
        self.bilin = nn.modules.Bilinear(in1_features, in2_features, out_features=1, bias=bias)

    def forward(self, X, Y):
        return [self.bilin(X, y) for y in Y]

    def project_x(self, X):
        if X.ndimension() == 1:
            X = X.unsqueeze(0)
        assert self.bilin.weight.size()[0] == 1
        return torch.mm(X, self.bilin.weight[0])

    def project_y(self, Y):
        if Y.ndimension() == 1:
            Y = Y.unsqueeze(0)
        assert self.bilin.weight.size()[0] == 1
        return torch.mm(Y, self.bilin.weight[0].transpose(1, 0))


class ModelEbay(nn.Module):
    def __init__(self, in1_features, in2_features, bias=True):
        super().__init__()
        self.fc1_1 = nn.Linear(in1_features, in2_features)
        self.fc1_2 = nn.Linear(in2_features, in2_features)
        self.fc2 = nn.Linear(2 * in2_features, in2_features)
        self.fc3 = nn.Linear(in2_features, 1)

    def forward_m(self, x, y):
        x = F.tanh(self.fc1_1(x))
        y = F.tanh(self.fc1_2(y))
        ccat = F.tanh(torch.cat((x,y), dim=0))
        hidden_l = F.tanh(self.fc2(ccat))
        final = self.fc3(hidden_l)
        return final

    def forward(self, X, Y):
        return [self.forward_m(X, y) for y in Y]

In [130]:
model = Bilinear(in1_features=dim_X, in2_features=dim_Y, bias=True)
model_ebay = ModelEbay(in1_features=dim_X, in2_features=dim_Y, bias=True)

In [131]:
X = Variable(X_train)  #[Variable(x.unsqueeze_(0)) for x in X]
Y = [Variable(y) for y in Y_train]
out = model(X, Y)

In [132]:
model_ebay(X, Y)



[tensor([-0.0186], grad_fn=<ThAddBackward>),
 tensor([-0.0200], grad_fn=<ThAddBackward>),
 tensor([-0.0254], grad_fn=<ThAddBackward>),
 tensor([-0.0182], grad_fn=<ThAddBackward>),
 tensor([-0.0133], grad_fn=<ThAddBackward>),
 tensor([-0.0172], grad_fn=<ThAddBackward>),
 tensor([-0.0225], grad_fn=<ThAddBackward>),
 tensor([-0.0251], grad_fn=<ThAddBackward>),
 tensor([-0.0152], grad_fn=<ThAddBackward>),
 tensor([-0.0172], grad_fn=<ThAddBackward>),
 tensor([-0.0164], grad_fn=<ThAddBackward>)]