In [1]:
import numpy as np
import torch 
import torch.nn as nn
from torch.autograd import Variable
import sys
sys.path.append('..')
from function.CDSSM import CDSSM

In [2]:
LETTER_GRAM_SIZE = 3 # See section 3.2.
WINDOW_SIZE = 3 # See section 3.2.
TOTAL_LETTER_GRAMS = int(3 * 1e4) # Determined from data. See section 3.2.
WORD_DEPTH = WINDOW_SIZE * TOTAL_LETTER_GRAMS # See equation (1).
# Uncomment it, if testing
# WORD_DEPTH = 1000
K = 300 # Dimensionality of the max-pooling layer. See section 3.4.
L = 128 # Dimensionality of latent semantic space. See section 3.5.
J = 4 # Number of random unclicked documents serving as negative examples for a query. See section 4.
FILTER_LENGTH = 1 # We only consider one time step for convolutions.

In [3]:
model = CDSSM()

# Build a random data set.
import numpy as np
sample_size = 10
l_Qs = []
pos_l_Ds = []

(query_len, doc_len) = (5, 100)

for i in range(sample_size):
    query_len = np.random.randint(1, 10)
    l_Q = np.random.rand(1, query_len, WORD_DEPTH)
    l_Qs.append(l_Q)
    
    doc_len = np.random.randint(50, 500)
    l_D = np.random.rand(1, doc_len, WORD_DEPTH)
    pos_l_Ds.append(l_D)

neg_l_Ds = [[] for j in range(J)]
for i in range(sample_size):
    possibilities = list(range(sample_size))
    possibilities.remove(i)
    negatives = np.random.choice(possibilities, J, replace = False)
    for j in range(J):
        negative = negatives[j]
        neg_l_Ds[j].append(pos_l_Ds[negative])

# Till now, we have made a complete numpy dataset
# Now let's convert the numpy variables to torch Variable

for i in range(len(l_Qs)):
    l_Qs[i] = Variable(torch.from_numpy(l_Qs[i]).float())
    pos_l_Ds[i] = Variable(torch.from_numpy(pos_l_Ds[i]).float())
    for j in range(J):
        neg_l_Ds[j][i] = Variable(torch.from_numpy(neg_l_Ds[j][i]).float())

print(l_Qs)

[tensor([[[0.7719, 0.5952, 0.3380,  ..., 0.6842, 0.3784, 0.5043]]]), tensor([[[0.1956, 0.2491, 0.0389,  ..., 0.0647, 0.9800, 0.0256]]]), tensor([[[0.1704, 0.1121, 0.3303,  ..., 0.6818, 0.3006, 0.8866],
         [0.9256, 0.2644, 0.0980,  ..., 0.9889, 0.0590, 0.3664],
         [0.9273, 0.7133, 0.8850,  ..., 0.4930, 0.7116, 0.3867]]]), tensor([[[0.9761, 0.8202, 0.9695,  ..., 0.8877, 0.0716, 0.8191],
         [0.9016, 0.3685, 0.7577,  ..., 0.6744, 0.7739, 0.9567],
         [0.5761, 0.6867, 0.1673,  ..., 0.0224, 0.5788, 0.3242],
         [0.9802, 0.9776, 0.5479,  ..., 0.2728, 0.4950, 0.8213],
         [0.8105, 0.9779, 0.2721,  ..., 0.7983, 0.8425, 0.7611]]]), tensor([[[0.4109, 0.0367, 0.9263,  ..., 0.4761, 0.5365, 0.2707],
         [0.8978, 0.2935, 0.6286,  ..., 0.3003, 0.3253, 0.5348],
         [0.5265, 0.8162, 0.9035,  ..., 0.5007, 0.1539, 0.1473],
         [0.4413, 0.8208, 0.4211,  ..., 0.1664, 0.1403, 0.1087],
         [0.1015, 0.5834, 0.3137,  ..., 0.3862, 0.6189, 0.3068]]]), tensor([[

In [4]:
print(pos_l_Ds)

[tensor([[[0.5708, 0.7644, 0.1324,  ..., 0.2642, 0.1962, 0.2701],
         [0.4554, 0.0116, 0.5475,  ..., 0.2553, 0.6002, 0.0449],
         [0.3571, 0.3287, 0.9599,  ..., 0.6617, 0.5453, 0.4007],
         ...,
         [0.7401, 0.8183, 0.2672,  ..., 0.5452, 0.2661, 0.9806],
         [0.6413, 0.2781, 0.0268,  ..., 0.5527, 0.3323, 0.9378],
         [0.1357, 0.1159, 0.7272,  ..., 0.2785, 0.1863, 0.5405]]]), tensor([[[0.8473, 0.8970, 0.0822,  ..., 0.4396, 0.9142, 0.7407],
         [0.8292, 0.6225, 0.2579,  ..., 0.5940, 0.9087, 0.8846],
         [0.2497, 0.3991, 0.7954,  ..., 0.8565, 0.6039, 0.6156],
         ...,
         [0.6991, 0.8019, 0.2040,  ..., 0.4890, 0.1135, 0.8505],
         [0.1158, 0.5832, 0.4240,  ..., 0.3957, 0.7209, 0.4201],
         [0.8636, 0.1134, 0.7481,  ..., 0.3783, 0.2085, 0.1796]]]), tensor([[[5.7849e-01, 7.5563e-01, 9.9759e-01,  ..., 1.7658e-01,
          4.5016e-02, 1.8533e-01],
         [1.3614e-01, 8.3894e-01, 6.6943e-03,  ..., 3.7970e-01,
          1.9550e-01, 

In [5]:
print(neg_l_Ds)

[[tensor([[[0.0740, 0.2594, 0.4058,  ..., 0.5785, 0.8132, 0.2842],
         [0.6928, 0.2494, 0.5399,  ..., 0.7170, 0.8761, 0.7443],
         [0.1372, 0.3375, 0.1056,  ..., 0.3792, 0.2584, 0.2988],
         ...,
         [0.1430, 0.7439, 0.8683,  ..., 0.5727, 0.2850, 0.8287],
         [0.9348, 0.3720, 0.1158,  ..., 0.7883, 0.0501, 0.4236],
         [0.5244, 0.3617, 0.4766,  ..., 0.4534, 0.6024, 0.9065]]]), tensor([[[0.6771, 0.2023, 0.0947,  ..., 0.4376, 0.5273, 0.5843],
         [0.4970, 0.6356, 0.3389,  ..., 0.7694, 0.2824, 0.1631],
         [0.8810, 0.5980, 0.2562,  ..., 0.2072, 0.5039, 0.8875],
         ...,
         [0.9243, 0.1403, 0.8902,  ..., 0.4373, 0.0111, 0.0300],
         [0.3782, 0.4285, 0.5587,  ..., 0.2084, 0.8003, 0.4182],
         [0.6633, 0.9954, 0.7371,  ..., 0.9106, 0.8207, 0.4462]]]), tensor([[[0.9867, 0.2654, 0.8371,  ..., 0.9730, 0.2689, 0.8249],
         [0.7523, 0.5160, 0.4905,  ..., 0.6485, 0.7410, 0.2874],
         [0.8033, 0.3651, 0.2308,  ..., 0.9353, 0.5457

In [6]:
# Loss and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)

# output variable, remember the cosine similarity with positive doc was at 0th index
y = np.ndarray(1)
# CrossEntropyLoss expects only the index as a long tensor
y[0] = 0
y = Variable(torch.from_numpy(y).long())

for i in range(sample_size):
    y_pred = model(l_Qs[i], pos_l_Ds[i], [neg_l_Ds[j][i] for j in range(J)])
    print(y_pred)
    print(y)
    loss = criterion(y_pred.resize(1,J+1), y)
    # print (i, loss.data[0])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

tensor([[[0.1496]],

        [[0.1735]],

        [[0.1439]],

        [[0.1705]],

        [[0.1384]]], grad_fn=<SqueezeBackward1>)
tensor([0])
tensor([[[0.0437]],

        [[0.0244]],

        [[0.0193]],

        [[0.0102]],

        [[0.0281]]], grad_fn=<SqueezeBackward1>)
tensor([0])
tensor([[[0.1433]],

        [[0.1357]],

        [[0.1709]],

        [[0.1167]],

        [[0.1671]]], grad_fn=<SqueezeBackward1>)
tensor([0])
tensor([[[0.0492]],

        [[0.1206]],

        [[0.0564]],

        [[0.0800]],

        [[0.0502]]], grad_fn=<SqueezeBackward1>)
tensor([0])
tensor([[[0.1041]],

        [[0.1197]],

        [[0.1513]],

        [[0.1904]],

        [[0.1218]]], grad_fn=<SqueezeBackward1>)
tensor([0])
tensor([[[0.0881]],

        [[0.2245]],

        [[0.2558]],

        [[0.1500]],

        [[0.1810]]], grad_fn=<SqueezeBackward1>)
tensor([0])
tensor([[[0.1294]],

        [[0.2131]],

        [[0.2910]],

        [[0.2449]],

        [[0.1528]]], grad_fn=<SqueezeBackward1