In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from importlib import reload 

In [2]:
from deeprank.dataset import DataLoader, PairGenerator, ListGenerator

In [3]:
loader = DataLoader('./config/letor07_mp_fold1.model')

[./data/letor/r5w/word_dict.txt]
	Word dict size: 193367
[./data/letor/r5w/qid_query.txt]
	Data size: 1692
[./data/letor/r5w/docid_doc.txt]
	Data size: 65323
[./data/letor/r5w/embed_wiki-pdc_d50_norm]
	Embedding size: 109282
Generate numpy embed: (193368, 50)


In [4]:
import json
letor_config = json.loads(open('./config/letor07_mp_fold1.model').read())
device = torch.device("cuda")
#device = torch.device("cpu")

In [5]:
Letor07Path = letor_config['data_dir']

letor_config['fill_word'] = loader._PAD_
letor_config['embedding'] = loader.embedding
letor_config['feat_size'] = loader.feat_size
letor_config['vocab_size'] = loader.embedding.shape[0]
letor_config['embed_dim'] = loader.embedding.shape[1]

pair_gen = PairGenerator(rel_file=Letor07Path + '/relation.train.fold%d.txt'%(letor_config['fold']), 
                         config=letor_config, device=device)

[./data/letor/r5w/relation.train.fold1.txt]
	Instance size: 47828
Pair Instance Count: 325439


In [6]:
from deeprank import select_module
from deeprank import rank_module

select_module = reload(select_module)
rank_module = reload(rank_module)

In [7]:
select_net = select_module.IdentityNet(config=letor_config, device=device)
select_net.train()
select_net = select_net.to(device)

In [8]:
letor_config['simmat_channel'] = 1
letor_config['conv_params'] = [(8, 2, 10)]
letor_config['fc_params'] = [50]
letor_config['dpool_size'] = [3, 10]
letor_config['lr'] = 0.001
letor_config['finetune_embed'] = False
rank_net = rank_module.MatchPyramidNet(config=letor_config)
rank_net.embedding.weight.data.copy_(torch.from_numpy(loader.embedding))
rank_net.train()
optimizer = optim.Adam(rank_net.parameters(), lr=letor_config['lr'])

In [9]:
letor_config['simmat_channel'] = 1
letor_config['conv_params'] = [(8, 3, 3)]
letor_config['fc_params'] = [200]
letor_config['dpool_size'] = [3, 10]
letor_config['lr'] = 0.001
letor_config['finetune_embed'] = False
rank_net = rank_module.MatchPyramidNet(config=letor_config, device=device)
rank_net = rank_net.to(device)
rank_net.embedding.weight.data.copy_(torch.from_numpy(loader.embedding))
rank_net.train()
optimizer = optim.Adam(rank_net.parameters(), lr=letor_config['lr'])

In [10]:
import time
start_t = time.time()
for i in range(50):
    X1, X1_len, X2, X2_len, Y, F = pair_gen.get_batch(data1=loader.query_data, data2=loader.doc_data)
    X1, X2, X1_len, X2_len = select_net(X1, X2, X1_len, X2_len)
    output = rank_net(X1, X2, X1_len, X2_len)
    loss = rank_net.pair_loss(output, Y)
    print(loss)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
end_t = time.time()
print('Time Cost: %s s' % (end_t-start_t))

tensor(0.9918, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.9925, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.9784, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.9941, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.9757, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.9591, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.9590, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.8981, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.9909, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.9288, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.9889, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.8583, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.9804, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.9684, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.9864, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.7860, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.8849, device='cuda:0', grad_fn=<MeanBackward1>)
tensor(0.9801, device='cuda:0',