In [1]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from importlib import reload 

In [2]:
from deeprank.dataset import DataLoader, PairGenerator, ListGenerator

In [3]:
loader = DataLoader('./config/letor07_mp_fold1.model')

[./data/letor/r5w/word_dict.txt]
	Word dict size: 193367
[./data/letor/r5w/qid_query.txt]
	Data size: 1692
[./data/letor/r5w/docid_doc.txt]
	Data size: 65323
[./data/letor/r5w/embed_wiki-pdc_d50_norm]
	Embedding size: 109282
Generate numpy embed: (193368, 50)


In [4]:
import json
letor_config = json.loads(open('./config/letor07_mp_fold1.model').read())

In [5]:
Letor07Path = letor_config['data_dir']

letor_config['fill_word'] = loader._PAD_
letor_config['embedding'] = loader.embedding
letor_config['feat_size'] = loader.feat_size
letor_config['vocab_size'] = loader.embedding.shape[0]
letor_config['embed_dim'] = loader.embedding.shape[1]

pair_gen = PairGenerator(rel_file=Letor07Path + '/relation.train.fold%d.txt'%(letor_config['fold']), 
                         config=letor_config)

[./data/letor/r5w/relation.train.fold1.txt]
	Instance size: 47828
Pair Instance Count: 325439


In [18]:
from deeprank import select_module
from deeprank import rank_module

select_module = reload(select_module)
rank_module = reload(rank_module)

In [19]:
select_net = select_module.IdentityNet(config=letor_config)
select_net.train()

IdentityNet()

In [22]:
letor_config['simmat_channel'] = 1
letor_config['conv_params'] = [(8, 2, 10)]
letor_config['fc_params'] = [50]
letor_config['dpool_size'] = [3, 10]
letor_config['lr'] = 0.001
letor_config['finetune_embed'] = False
rank_net = rank_module.MatchPyramidNet(config=letor_config)
rank_net.embedding.weight.data.copy_(torch.from_numpy(loader.embedding))
rank_net.train()
optimizer = optim.Adam(rank_net.parameters(), lr=letor_config['lr'])

In [24]:
letor_config['simmat_channel'] = 1
letor_config['conv_params'] = [(8, 3, 3)]
letor_config['fc_params'] = [200]
letor_config['dpool_size'] = [3, 10]
letor_config['lr'] = 0.001
letor_config['finetune_embed'] = False
rank_net = rank_module.MatchPyramidNet(config=letor_config)
rank_net.embedding.weight.data.copy_(torch.from_numpy(loader.embedding))
rank_net.train()
optimizer = optim.Adam(rank_net.parameters(), lr=letor_config['lr'])

In [25]:
for i in range(300):
    X1, X1_len, X2, X2_len, Y, F = pair_gen.get_batch(data1=loader.query_data, data2=loader.doc_data)
    X1, X2, X1_len, X2_len = select_net(X1, X2, X1_len, X2_len)
    output = rank_net(X1, X2, X1_len, X2_len)
    loss = rank_net.pair_loss(output, Y)
    print(loss)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

tensor(1.0182, grad_fn=<MeanBackward1>)
tensor(0.9874, grad_fn=<MeanBackward1>)
tensor(1.0077, grad_fn=<MeanBackward1>)
tensor(1.0147, grad_fn=<MeanBackward1>)
tensor(1.0233, grad_fn=<MeanBackward1>)
tensor(0.9954, grad_fn=<MeanBackward1>)
tensor(1.0098, grad_fn=<MeanBackward1>)
tensor(1.0121, grad_fn=<MeanBackward1>)
tensor(0.9943, grad_fn=<MeanBackward1>)
tensor(0.9896, grad_fn=<MeanBackward1>)
tensor(0.9940, grad_fn=<MeanBackward1>)
tensor(0.9946, grad_fn=<MeanBackward1>)
tensor(0.9935, grad_fn=<MeanBackward1>)
tensor(0.9920, grad_fn=<MeanBackward1>)
tensor(0.9933, grad_fn=<MeanBackward1>)
tensor(1.0051, grad_fn=<MeanBackward1>)
tensor(0.9876, grad_fn=<MeanBackward1>)
tensor(0.9943, grad_fn=<MeanBackward1>)
tensor(0.9998, grad_fn=<MeanBackward1>)
tensor(1.0017, grad_fn=<MeanBackward1>)
tensor(0.9990, grad_fn=<MeanBackward1>)
tensor(1.0003, grad_fn=<MeanBackward1>)
tensor(0.9896, grad_fn=<MeanBackward1>)
tensor(0.9995, grad_fn=<MeanBackward1>)
tensor(0.9936, grad_fn=<MeanBackward1>)


tensor(0.9236, grad_fn=<MeanBackward1>)
tensor(1.0288, grad_fn=<MeanBackward1>)
tensor(0.8799, grad_fn=<MeanBackward1>)
tensor(0.9722, grad_fn=<MeanBackward1>)
tensor(0.9304, grad_fn=<MeanBackward1>)
tensor(0.9420, grad_fn=<MeanBackward1>)
tensor(0.9243, grad_fn=<MeanBackward1>)
tensor(0.8609, grad_fn=<MeanBackward1>)
tensor(0.9439, grad_fn=<MeanBackward1>)
tensor(0.9483, grad_fn=<MeanBackward1>)
tensor(0.9407, grad_fn=<MeanBackward1>)
tensor(0.8079, grad_fn=<MeanBackward1>)
tensor(0.9699, grad_fn=<MeanBackward1>)
tensor(0.8709, grad_fn=<MeanBackward1>)
tensor(0.9189, grad_fn=<MeanBackward1>)
tensor(0.9790, grad_fn=<MeanBackward1>)
tensor(0.8862, grad_fn=<MeanBackward1>)
tensor(0.9377, grad_fn=<MeanBackward1>)
tensor(0.9103, grad_fn=<MeanBackward1>)
tensor(0.9422, grad_fn=<MeanBackward1>)
tensor(0.8297, grad_fn=<MeanBackward1>)
tensor(0.9230, grad_fn=<MeanBackward1>)
tensor(1.0309, grad_fn=<MeanBackward1>)
tensor(0.9123, grad_fn=<MeanBackward1>)
tensor(1.0415, grad_fn=<MeanBackward1>)
