In [1]:
import keras
from os.path import join
import os
from bs4 import BeautifulSoup as BS
from constants import MAX_SEQUENCE_LENGTH, MAX_NUM_WORDS, EMBEDDING_DIM
from keras.preprocessing.sequence import pad_sequences
import numpy as np
np.random.seed(1337)
def quote_title_abstract(xml_path):
    with open(xml_path, 'r') as f:
        data = f.read()
    soup = BS(data)
    title, abstract = soup.find('title').text, soup.find('abstract').text
    return title.strip(), abstract.strip()

# text preprocessing
data_path = join('./','kaggle/')
xml_dir = join(data_path, 't2-doc')
xml_list = [f for f in os.listdir(xml_dir) if f.endswith('.xml')]
# print(len(xml_list))


texts = []

for xml in xml_list:
    path = join(xml_dir,xml)
    title, abstract = quote_title_abstract(path)
    text = title + '' + abstract
    texts.append(text)
#     texts.append(title)
#     texts.append(abstract)
print('read all %d xml files.' % len(xml_list))
tokenizer = keras.preprocessing.text.Tokenizer(num_words=MAX_NUM_WORDS, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~ ',
                                   lower=True, split=' ', char_level=False, oov_token=None)
tokenizer.fit_on_texts(texts)
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))
sequences = tokenizer.texts_to_sequences(texts)
data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
xml_id_map = {}
for i,xml in enumerate(xml_list):
    node_id = int(xml.replace('.xml',''))
    xml_id_map[node_id] = data[i,:]


print('Preparing embedding matrix.')
embeddings_index = {}
# with open(os.path.join('./','glove', 'glove.6B.%dd.txt' % EMBEDDING_DIM), 'r', encoding='utf8') as f:
with open(os.path.join('./','glove', 'glove.6B.%dd.txt' % EMBEDDING_DIM), 'r') as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
# prepare embedding matrix
num_words = min(MAX_NUM_WORDS, len(word_index)) + 1
embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))
for word, i in word_index.items():
    if i > MAX_NUM_WORDS:
        continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

print('done')

Using TensorFlow backend.


read all 17500 xml files.
Found 82709 unique tokens.
Preparing embedding matrix.
done


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.init import xavier_normal_
from constants import D_MODEL, STACKED_NUM,DK, DV, H, P_DROP, D_FF, MAX_SEQUENCE_LENGTH, MAX_NUM_WORDS, EMBEDDING_DIM
# environment
with_gpu = torch.cuda.is_available()
# with_gpu = False
device = torch.device("cuda:0" if with_gpu else "cpu")

def positional_encoding(pos):
    assert D_MODEL % 2 == 0
    pos = torch.tensor(pos, dtype=torch.float32, requires_grad=False)
    pe = torch.zeros([1,D_MODEL], dtype=torch.float32, requires_grad=False)
    for i in range(D_MODEL//2):
        a = torch.tensor(10000, dtype=torch.float32, requires_grad=False)
        b = torch.tensor(2.*i/float(D_MODEL), dtype=torch.float32, requires_grad=False)
        c = pos / torch.pow(a, b)
        pe[0, 2*i] = torch.sin(c)
        pe[0, 2*i+1] = torch.cos(c)
    return pe
def get_pos_mat(length):
    if length > MAX_SEQUENCE_LENGTH:
        print('sequence length reach PE_MAT_CACHE. %d ' % length)
        ret = torch.cat([positional_encoding(i) for i in range(length)], dim=0).to(device)
        ret.requires_grad = False
        global PE_CACHE_MATRIX
        PE_CACHE_MATRIX = ret
        return ret
    else:
        return PE_CACHE_MATRIX[:length]
    
PE_CACHE_MATRIX = torch.cat([positional_encoding(i) for i in range(0,MAX_SEQUENCE_LENGTH)], dim=0).to(device)
PE_CACHE_MATRIX.requires_grad = False

# construct neuron network

def scaled_dot_attention(Q, K, V, mask=None):
    assert Q.size()[-1] == K.size()[-1]
    dk = torch.tensor(K.size()[-1], dtype=torch.float32, requires_grad=False).to(device)
    out = torch.matmul(Q,K.t()) / torch.sqrt(dk) 
    if mask is not None:
        out = out.masked_fill_(mask, -float('inf'))
        
    return torch.matmul(F.softmax(out, dim=-1), V)
                            
class Transformer(nn.Module):

    def __init__(self, layer_num, dk, dv, dm, h, emb_matrix):
        super(Transformer, self).__init__()
        
        self.emb = Word_Embedding(emb_matrix)
        
        self.emb_drop = nn.Dropout(P_DROP)
        
        self.encoder = Stack_Encoder(layer_num, dk, dv, dm, h)
        self.decoder = Stack_Decoder(layer_num, dk, dv, dm, h)
        self.summary_decoder = Stack_Decoder(2, dk, dv, dm, h)
        
        self.summary_weight = nn.Parameter(torch.FloatTensor(1, dm))
        torch.nn.init.xavier_uniform_(self.summary_weight)
        
        self.output_linear = nn.Linear(dm, 1)

    def forward(self, Q, K):
        
#         encoder
        K = self.emb(K)
#         print(K.size(), get_pos_mat(MAX_SEQUENCE_LENGTH).size())
        K = K + get_pos_mat(MAX_SEQUENCE_LENGTH)
        K = self.emb_drop(K)
        
        en_out = self.encoder(K)
        
#         decoder
        Q = self.emb(Q)
        seq_len, d = Q.size()
        
        Q = Q + get_pos_mat(MAX_SEQUENCE_LENGTH)
        Q = self.emb_drop(Q)
        
        de_out = self.decoder(Q, en_out)
        
        
        summary = self.summary_decoder(self.summary_weight, de_out)
        out = self.output_linear(summary)
        out = torch.sigmoid(out)
        

        return out

class Word_Embedding(nn.Module):
    def __init__(self, emb_matrix):
        super(Word_Embedding, self).__init__()
        self.emb = nn.Embedding(MAX_NUM_WORDS, EMBEDDING_DIM, padding_idx=0)
        self.emb.weight = nn.parameter.Parameter(torch.FloatTensor(emb_matrix))
        self.emb.weight.requires_grad_(False)
        
        self.linear = nn.Linear(EMBEDDING_DIM, D_MODEL, bias=False)
        


    def forward(self, x):
        x = self.emb(x)
        x = self.linear(x)
        return x
    
class Stack_Encoder(nn.Module):
    """
    Stacked Encoder
    """
    def __init__(self, layer_num, dk, dv, dm, h):
        super(Stack_Encoder, self).__init__()
        self.encoders = nn.ModuleList([Encoder(dk, dv, dm, h) for i in range(layer_num)])

    def forward(self, K):
        # ModuleList can act as an iterable, or be indexed using ints
        for lay in self.encoders:
            K = lay(K)
        return K               
class Encoder(nn.Module):
    def __init__(self, dk, dv, dm, h):
        super(Encoder, self).__init__()
#         attention residual block
        self.multi_head_attention_layer = Multi_Head_attention_layer(dk, dv, dm, h)
        self.attention_norm_lay = nn.LayerNorm([dm,])
        self.att_drop = nn.Dropout(P_DROP)
#         feed forward residual block
        self.fcn = PositionwiseFeedForward(D_MODEL, D_FF)
        self.linear_drop = nn.Dropout(P_DROP)
        self.ff_norm_lay = nn.LayerNorm([dm, ])
        

    def forward(self, K):
#         attention
        attention_out = self.multi_head_attention_layer(K, K, K)
        attention_out = self.att_drop(attention_out)
        att_out = self.attention_norm_lay(K + attention_out)
#         feed forward
        linear_out = self.fcn(att_out)
        linear_out = self.linear_drop(linear_out)
        out = self.ff_norm_lay(att_out + linear_out)
        out = att_out + linear_out
    
        return out
class Stack_Decoder(nn.Module):
    """
    Stacked Encoder
    """
    def __init__(self, layer_num, dk, dv, dm, h):
        super(Stack_Decoder, self).__init__()
        self.decoders = nn.ModuleList([Decoder(dk, dv, dm, h) for i in range(layer_num)])
        
        
    def forward(self, Q, encoder_out):
        # ModuleList can act as an iterable, or be indexed using ints
        Q_len, d = Q.size()
        for lay in self.decoders:
            Q = lay(Q, encoder_out, mask=None)
        return Q           

class Decoder(nn.Module):
    def __init__(self, dk, dv, dm, h):
        super(Decoder, self).__init__()
#         query attention residual block
        self.Q_attention_lay = Multi_Head_attention_layer(dk, dv, dm, h)
        self.Q_attention_norm_lay = nn.LayerNorm([dm, ])
        self.Q_att_drop = nn.Dropout(P_DROP)
    
#         query key attention residual block
        self.QK_attention_lay = Multi_Head_attention_layer(dk, dv, dm, h)
        self.QK_attention_norm_lay = nn.LayerNorm([dm, ])
        self.QK_att_drop = nn.Dropout(P_DROP)
        
    
#         feed forward residual block
        self.fcn = PositionwiseFeedForward(D_MODEL, D_FF)
        self.ff_norm_lay = nn.LayerNorm([dm, ])
        self.linear_drop = nn.Dropout(P_DROP)
        

    def forward(self, Q, encoder_out, mask):
#         query attention
        Q_attention_out = self.Q_attention_lay(Q, Q, Q, mask)
        Q_attention_out = self.Q_att_drop(Q_attention_out)
        Q_att_out = self.Q_attention_norm_lay(Q + Q_attention_out)
#         query key attention
        QK_attention_out = self.QK_attention_lay(Q_att_out, encoder_out, encoder_out)
        QK_attention_out = self.QK_att_drop(QK_attention_out)
        QK_att_out = self.QK_attention_norm_lay(Q_att_out + QK_attention_out)
        
#         feed forward
        linear_out = self.fcn(QK_att_out)
        out = self.ff_norm_lay(QK_att_out + linear_out)
        return out

class Multi_Head_attention_layer(nn.Module):
    def __init__(self, dk, dv, dm, h):
        super(Multi_Head_attention_layer, self).__init__()
        self.Q_linears = nn.ModuleList([nn.Linear(dm, dk) for i in range(h)])
        self.K_linears = nn.ModuleList([nn.Linear(dm, dk) for i in range(h)])
        self.V_linears = nn.ModuleList([nn.Linear(dm, dv) for i in range(h)])
        self.output_linear = nn.Linear(h*dv, dm)
                            

    def forward(self, Q_input, K_input, V_input, mask=None):
        buf = []
        for Q_linear, K_linear, V_linear in zip(self.Q_linears, self.K_linears, self.V_linears):
            Q = Q_linear(Q_input)
            K = K_linear(K_input)
            V = V_linear(V_input)
            buf.append(scaled_dot_attention(Q, K, V, mask))
            
        buf = torch.cat(buf,dim=-1)
        out = self.output_linear(buf)
        
        return out      
class PositionwiseFeedForward(nn.Module):
    def __init__(self, d_model, d_ff):
        super(PositionwiseFeedForward, self).__init__()
        self.cnn1 = nn.Conv1d(d_model, d_ff, 1)
        self.cnn2 = nn.Conv1d(d_ff, d_model, 1)
                            

    def forward(self, x):
        seq_len,_ = x.size()
        x = x.unsqueeze(0)
        x = x.permute(0,2,1)
        x = self.cnn1(x)
        x = F.relu(x)
        x = self.cnn2(x)
        x = x.permute(0,2,1)
        x = x.squeeze(0)
        
        return x      
    
# encoder = Stack_Encoder(6, 64,64,20,8)
# # print net
Q = torch.randint(10000,[MAX_SEQUENCE_LENGTH,], dtype=torch.long).to(device)
V = torch.randint(10000,[MAX_SEQUENCE_LENGTH,], dtype=torch.long).to(device)
Q_fea = torch.rand([D_MODEL,]).to(device)
K_fea = torch.rand([D_MODEL,]).to(device)
net = Transformer(STACKED_NUM, DK, DV, D_MODEL, H, embedding_matrix).to(device)
print(Q.dtype)
o = net(Q, V)
# print t
print(o.size())
# print o
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(count_parameters(net))

torch.int64
torch.Size([1, 1])
5583361


In [3]:
tmp_m = torch.load('./best_loss.pt')
net.decoder.load_state_dict(tmp_m.decoder.state_dict())
net.encoder.load_state_dict(tmp_m.encoder.state_dict())
# torch.nn.init.xavier_uniform_(net.output_linear.weight)
print 'load weight done'

load weight done


In [4]:

links = np.genfromtxt(join(data_path,'t2-train.txt'), dtype=np.int32)
idx_map = {node:idx for idx, node in enumerate(list(set(links.flatten().tolist())))}
N = links.shape[0]
adj_mat = np.zeros([N,N], dtype=np.uint8)
for i in range(links.shape[0]):
    src, dst = links[i].tolist()
    adj_mat[idx_map[src], idx_map[dst]] = 1




In [6]:
import numpy as np
import scipy.sparse as sp
from tqdm import tqdm

def positive_bootsrap_generator(edges, xml_id_map):
    num_edge = len(edges)
        
    while True:
        for idx in np.random.permutation(num_edge):
            src, dst = edges[idx, :]
            Q = xml_id_map[dst]
            K = xml_id_map[src]
            yield Q, K
def negative_bootsrap_generator(adj_mat, links, idx_map, xml_id_map, training_node_list, neighbor_link_rate=0.8):
    
    
    exist_node_list = xml_id_map.keys()
    exist_N = len(training_node_list)
    N = adj_mat.shape[0]
    
#     adj mat
    links = np.array(list(map(idx_map.get, links.flatten())),
                     dtype=np.int32).reshape(links.shape)
    
    adj_sp = sp.coo_matrix((np.ones(links.shape[0]), (links[:, 0], links[:, 1])),
                        shape=(N, N),
                        dtype=np.uint8)
    adj_sp_2 = (sp.coo_matrix.dot(adj_sp,adj_sp) + adj_sp).tocoo()
    
    rev_map = {v:k for k,v in idx_map.items()}
    adj_map = {i:[] for i in range(N)}
    with tqdm(total=len(adj_sp_2.row)) as pbar:
        for i,j,v in zip(adj_sp_2.row, adj_sp_2.col, adj_sp_2.data):
            if adj_mat[i, j] != 1 and v == 1:
                adj_map[i].append(j)
            pbar.update(1)
#             print i,N
                
    while True:
        src = training_node_list[np.random.randint(exist_N)]
        
#         choose neighbor link
        if np.random.rand(1) <= neighbor_link_rate:
        
            i = idx_map[src]
            high = len(adj_map[i])
            while high == 0:
                src = training_node_list[np.random.randint(exist_N)]
                i = idx_map[src]
                high = len(adj_map[i])
                
            idx = np.random.randint(high)
            dst = adj_map[i][idx]
            dst = rev_map[dst]
        else:
            dst = training_node_list[np.random.randint(exist_N)]
            while adj_mat[idx_map[src], idx_map[dst]] == 1:
                dst = training_node_list[np.random.randint(exist_N)]
        Q = xml_id_map[dst]
        K = xml_id_map[src]
        yield Q, K

def val_data(edges, xml_id_map):
    Q, K = [],[]
    
    for idx in range(edges.shape[0]):
        src, dst = edges[idx, :]
        q = xml_id_map[dst]
        k = xml_id_map[src]
        
        Q.append(q)
        K.append(k)
        
    Q = np.vstack(Q)
    K = np.vstack(K)
    
    return Q, K
    
N = links.shape[0]
idx = np.random.permutation(N)
train_idx = idx[N//10:]
val_idx = idx[:N//10]

pos_G = positive_bootsrap_generator(links[train_idx,:], xml_id_map)
training_node_list = list(set(links[train_idx,:].flatten().tolist()))
neg_G = negative_bootsrap_generator(adj_mat, links, idx_map, xml_id_map, training_node_list)
val_Q, val_K = val_data(links[val_idx,:], xml_id_map)
q,k = next(pos_G)
print(q.shape,k.shape)
q,k = next(neg_G)
print(q.shape,k.shape)
print(val_Q.shape,val_K.shape)


((150,), (150,))


100%|██████████| 581426/581426 [00:01<00:00, 338265.54it/s]

((150,), (150,))
((8668, 150), (8668, 150))





In [7]:
from collections import deque
import time
def dump_log(model, n_iter, loss, acc, val_loss, val_acc, log_file_stream, tmp_model_path):
    log_text = '%.7d<split>%.5f<split>%.5f<split>%.5f<split>%.5f\n' % (n_iter, loss, acc, val_loss, val_acc)
    log_file_stream.write(log_text)
    if n_iter % 100 == 0 :
        log_file_stream.flush()
        torch.save(model, tmp_model_path)

acc_q = deque(maxlen=1000)
loss_q = deque(maxlen=1000)
val_acc_q = deque(maxlen=1000)
val_loss_q = deque(maxlen=1000)
criterion = nn.BCELoss()
# 
model = net
model.cuda()
optimizer = torch.optim.Adam(model.parameters(),lr=0.0001)
#
interval = 100
t = time.time()
print 'start training.'
best_acc  = 0
best_loss = float('inf')
for i in range(1,1000000):
    with open('text_log.txt', 'a') as f:
        
        optimizer.zero_grad()
        model.train()
    #     positive
        q,k = next(pos_G)
        q,k = torch.LongTensor(q), torch.LongTensor(k)
        output = model(q.cuda(), k.cuda())
        acc = 1 if output.flatten().item() > 0.5 else 0
        acc_q.append(acc)
        pos_loss = criterion(output, torch.FloatTensor([[1]]).cuda() )

#         negative
        q,k = next(neg_G)
        q,k = torch.LongTensor(q), torch.LongTensor(k)
        
        output = model(q.cuda(), k.cuda())
        acc = 1 if output.flatten().item() < 0.5 else 0
        acc_q.append(acc)
        neg_loss = criterion(output, torch.FloatTensor([[0]]).cuda())
        loss = pos_loss + neg_loss
        loss_q.append(loss.item())
        loss.backward()
        optimizer.step()
    #     val
        model.eval()
        with torch.no_grad():
            val_i = i % val_Q.shape[0]
            q,k = val_Q[val_i,:], val_K[val_i,:]
            q,k = torch.LongTensor(q), torch.LongTensor(k)
        
            output = model(q.cuda(), k.cuda())
            val_acc = 1 if output.flatten().item() > 0.5 else 0
            val_acc_q.append(val_acc)

            val_loss = criterion(output, torch.FloatTensor([[1]]).cuda() )
            val_loss_q.append(val_loss.item())

        model.eval()
        acc = float(np.mean(acc_q))
        loss = float(np.mean(loss_q))
        val_acc = float(np.mean(val_acc_q))
        val_loss = float(np.mean(val_loss_q))

        if i % interval == 0:
            print('iter: {:04d}'.format(i+1),
                  'loss_train: {:.4f}'.format(loss),
                  'acc_train: {:.4f}'.format(acc),
                  'loss_val: {:.4f}'.format(val_loss),
                  'acc_val: {:.4f}'.format(val_acc),
                  'time: {:.4f}s'.format((time.time() - t)))
        if i > 100:
            if val_acc > best_acc:
                best_acc = val_acc
                torch.save(model, './text_best_acc.pt')
                with open('./text_best.txt', 'a') as g:
                    g.write('best acc at %d with %.5f\n' % (i+1, best_acc))

            if val_loss < best_loss:
                best_loss = val_loss
                torch.save(model, './text_best_loss.pt')
                with open('./text_best.txt', 'a') as g:
                    g.write('best loss at %d with %.5f\n' % (i+1, best_loss))
            
        dump_log(model, i+1, loss, acc, val_loss, val_acc, f, './text_tmp.pt')


# Train model
print("Optimization Finished!")
# print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

start training.


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


('iter: 0101', 'loss_train: 1.4029', 'acc_train: 0.4650', 'loss_val: 0.7021', 'acc_val: 0.5300', 'time: 19.3228s')
('iter: 0201', 'loss_train: 1.4047', 'acc_train: 0.4375', 'loss_val: 0.6961', 'acc_val: 0.5650', 'time: 49.0655s')
('iter: 0301', 'loss_train: 1.3993', 'acc_train: 0.4583', 'loss_val: 0.6951', 'acc_val: 0.5233', 'time: 70.0715s')
('iter: 0401', 'loss_train: 1.3996', 'acc_train: 0.4662', 'loss_val: 0.6947', 'acc_val: 0.5300', 'time: 90.4702s')
('iter: 0501', 'loss_train: 1.3966', 'acc_train: 0.4800', 'loss_val: 0.6946', 'acc_val: 0.5140', 'time: 109.6129s')
('iter: 0601', 'loss_train: 1.3945', 'acc_train: 0.4850', 'loss_val: 0.6933', 'acc_val: 0.5367', 'time: 130.0137s')
('iter: 0701', 'loss_train: 1.3940', 'acc_train: 0.5020', 'loss_val: 0.6926', 'acc_val: 0.5457', 'time: 151.5763s')
('iter: 0801', 'loss_train: 1.3940', 'acc_train: 0.4990', 'loss_val: 0.6923', 'acc_val: 0.5463', 'time: 170.6986s')
('iter: 0901', 'loss_train: 1.3936', 'acc_train: 0.4970', 'loss_val: 0.6921'

('iter: 7201', 'loss_train: 1.3881', 'acc_train: 0.4990', 'loss_val: 0.6921', 'acc_val: 0.5370', 'time: 1431.3805s')
('iter: 7301', 'loss_train: 1.3883', 'acc_train: 0.5050', 'loss_val: 0.6920', 'acc_val: 0.5440', 'time: 1451.8062s')
('iter: 7401', 'loss_train: 1.3882', 'acc_train: 0.5030', 'loss_val: 0.6915', 'acc_val: 0.5730', 'time: 1472.3266s')
('iter: 7501', 'loss_train: 1.3882', 'acc_train: 0.4950', 'loss_val: 0.6919', 'acc_val: 0.5570', 'time: 1493.2076s')
('iter: 7601', 'loss_train: 1.3876', 'acc_train: 0.5040', 'loss_val: 0.6919', 'acc_val: 0.5700', 'time: 1514.2270s')
('iter: 7701', 'loss_train: 1.3869', 'acc_train: 0.5120', 'loss_val: 0.6913', 'acc_val: 0.5990', 'time: 1536.3868s')
('iter: 7801', 'loss_train: 1.3865', 'acc_train: 0.5220', 'loss_val: 0.6914', 'acc_val: 0.5940', 'time: 1557.6656s')
('iter: 7901', 'loss_train: 1.3868', 'acc_train: 0.5170', 'loss_val: 0.6913', 'acc_val: 0.6040', 'time: 1577.9057s')
('iter: 8001', 'loss_train: 1.3879', 'acc_train: 0.5270', 'loss_

('iter: 14201', 'loss_train: 1.3870', 'acc_train: 0.4900', 'loss_val: 0.6933', 'acc_val: 0.4970', 'time: 2880.2474s')
('iter: 14301', 'loss_train: 1.3871', 'acc_train: 0.4800', 'loss_val: 0.6934', 'acc_val: 0.5040', 'time: 2909.9726s')
('iter: 14401', 'loss_train: 1.3872', 'acc_train: 0.4800', 'loss_val: 0.6934', 'acc_val: 0.4990', 'time: 2929.1637s')
('iter: 14501', 'loss_train: 1.3870', 'acc_train: 0.4890', 'loss_val: 0.6933', 'acc_val: 0.5150', 'time: 2948.4127s')
('iter: 14601', 'loss_train: 1.3872', 'acc_train: 0.4930', 'loss_val: 0.6934', 'acc_val: 0.5110', 'time: 2967.6139s')
('iter: 14701', 'loss_train: 1.3872', 'acc_train: 0.5040', 'loss_val: 0.6930', 'acc_val: 0.5380', 'time: 2986.8131s')
('iter: 14801', 'loss_train: 1.3872', 'acc_train: 0.5160', 'loss_val: 0.6933', 'acc_val: 0.5290', 'time: 3006.0529s')
('iter: 14901', 'loss_train: 1.3871', 'acc_train: 0.5140', 'loss_val: 0.6928', 'acc_val: 0.5690', 'time: 3025.4974s')
('iter: 15001', 'loss_train: 1.3864', 'acc_train: 0.5210

('iter: 21201', 'loss_train: 1.3868', 'acc_train: 0.4850', 'loss_val: 0.6934', 'acc_val: 0.5320', 'time: 4340.1002s')
('iter: 21301', 'loss_train: 1.3866', 'acc_train: 0.4950', 'loss_val: 0.6934', 'acc_val: 0.5360', 'time: 4363.6942s')
('iter: 21401', 'loss_train: 1.3870', 'acc_train: 0.5100', 'loss_val: 0.6940', 'acc_val: 0.5080', 'time: 4387.4436s')
('iter: 21501', 'loss_train: 1.3845', 'acc_train: 0.5400', 'loss_val: 0.6932', 'acc_val: 0.4950', 'time: 4411.2327s')
('iter: 21601', 'loss_train: 1.3796', 'acc_train: 0.5600', 'loss_val: 0.6905', 'acc_val: 0.4950', 'time: 4434.8402s')
('iter: 21701', 'loss_train: 1.3758', 'acc_train: 0.5810', 'loss_val: 0.6903', 'acc_val: 0.4470', 'time: 4459.5475s')
('iter: 21801', 'loss_train: 1.3761', 'acc_train: 0.5810', 'loss_val: 0.6884', 'acc_val: 0.4260', 'time: 4483.4564s')
('iter: 21901', 'loss_train: 1.3749', 'acc_train: 0.5860', 'loss_val: 0.6871', 'acc_val: 0.4190', 'time: 4508.4862s')
('iter: 22001', 'loss_train: 1.3723', 'acc_train: 0.5780

('iter: 28201', 'loss_train: 1.3580', 'acc_train: 0.5660', 'loss_val: 0.6849', 'acc_val: 0.5710', 'time: 6041.9623s')
('iter: 28301', 'loss_train: 1.3582', 'acc_train: 0.5620', 'loss_val: 0.6853', 'acc_val: 0.5750', 'time: 6066.0473s')
('iter: 28401', 'loss_train: 1.3605', 'acc_train: 0.5440', 'loss_val: 0.6838', 'acc_val: 0.5770', 'time: 6089.9737s')
('iter: 28501', 'loss_train: 1.3587', 'acc_train: 0.5710', 'loss_val: 0.6848', 'acc_val: 0.5720', 'time: 6114.0787s')
('iter: 28601', 'loss_train: 1.3655', 'acc_train: 0.5650', 'loss_val: 0.6877', 'acc_val: 0.5660', 'time: 6138.2190s')
('iter: 28701', 'loss_train: 1.3695', 'acc_train: 0.5740', 'loss_val: 0.6813', 'acc_val: 0.5750', 'time: 6162.1740s')
('iter: 28801', 'loss_train: 1.3702', 'acc_train: 0.5760', 'loss_val: 0.6831', 'acc_val: 0.5690', 'time: 6186.2228s')
('iter: 28901', 'loss_train: 1.3727', 'acc_train: 0.5850', 'loss_val: 0.6838', 'acc_val: 0.5620', 'time: 6209.9967s')
('iter: 29001', 'loss_train: 1.3648', 'acc_train: 0.5840

('iter: 35201', 'loss_train: 1.3663', 'acc_train: 0.5640', 'loss_val: 0.6786', 'acc_val: 0.3950', 'time: 7619.7122s')
('iter: 35301', 'loss_train: 1.3688', 'acc_train: 0.5700', 'loss_val: 0.6772', 'acc_val: 0.4030', 'time: 7639.5431s')
('iter: 35401', 'loss_train: 1.3710', 'acc_train: 0.5760', 'loss_val: 0.6770', 'acc_val: 0.4060', 'time: 7660.1917s')
('iter: 35501', 'loss_train: 1.3703', 'acc_train: 0.5790', 'loss_val: 0.6731', 'acc_val: 0.4120', 'time: 7684.1300s')
('iter: 35601', 'loss_train: 1.3675', 'acc_train: 0.5830', 'loss_val: 0.6688', 'acc_val: 0.4230', 'time: 7708.7637s')
('iter: 35701', 'loss_train: 1.3663', 'acc_train: 0.5760', 'loss_val: 0.6731', 'acc_val: 0.4200', 'time: 7732.6731s')
('iter: 35801', 'loss_train: 1.3628', 'acc_train: 0.5830', 'loss_val: 0.6715', 'acc_val: 0.4250', 'time: 7756.7825s')
('iter: 35901', 'loss_train: 1.3636', 'acc_train: 0.5750', 'loss_val: 0.6737', 'acc_val: 0.4200', 'time: 7780.0941s')
('iter: 36001', 'loss_train: 1.3625', 'acc_train: 0.5760

KeyboardInterrupt: 

In [None]:
# import numpy as np
# batch_size = 128
# # xml_id_map[113].shape
# def positive_bootsrap_generator(edges, xml_id_map):
#     num_edge = len(edges)
        
#     while True:
#         for idx in np.random.permutation(num_edge):
#             src, dst = edges[idx, :]
#             Q = xml_id_map[dst]
#             K = xml_id_map[src]
#             yield Q, K
# def negative_bootsrap_generator(adj_mat, idx_map, xml_id_map, training_node_list):
#     exist_node_list = xml_id_map.keys()
#     exist_N = len(training_node_list)
        
#     while True:
#         src = training_node_list[np.random.randint(exist_N)]
#         dst = training_node_list[np.random.randint(exist_N)]
#         while adj_mat[idx_map[src], idx_map[dst]] == 1:
#             dst = training_node_list[np.random.randint(exist_N)]
#         Q = xml_id_map[dst]
#         K = xml_id_map[src]
#         yield Q, K
# def val_data(edges, xml_id_map):
#     Q, K = [],[]
    
#     for idx in range(edges.shape[0]):
#         src, dst = edges[idx, :]
#         q = xml_id_map[dst]
#         k = xml_id_map[src]
#         Q.append(q)
#         K.append(k)
#     Q = np.vstack(Q)
#     K = np.vstack(K)
    
#     return Q, K
    
# N = edges.shape[0]
# idx = np.random.permutation(N)
# train_idx = idx[N//10:]
# val_idx = idx[:N//10]

# pos_G = positive_bootsrap_generator(edges[train_idx,:], xml_id_map)
# training_node_list = list(set(edges[train_idx,:].flatten().tolist()))
# neg_G = negative_bootsrap_generator(adj_mat, idx_map, xml_id_map, training_node_list)
# val_Q, val_K = val_data(edges[val_idx,:], xml_id_map)
# q,k = next(pos_G)
# print(q.shape,k.shape)
# q,k = next(neg_G)
# print(q.shape,k.shape)
# print(val_Q.shape,val_K.shape)
# # 