In [1]:

import os
import sys
import argparse
import math
import numpy as np
import timeit
import torch
import torch.utils.data as data_utils
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from sklearn.model_selection import train_test_split

import src.general.global_variables as gv
from src.general.utils import cc_path

sys.path.append(gv.PROJECT_PATH)


sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname('data_loader.py'), os.path.pardir)))
from src.data.data_loader import DataLoader

# def load_data(data_path, max_length, vocab_size, batch_size=64):
#     X_trn, Y_trn, X_tst, Y_tst, vocabulary, vocabulary_inv = data_helpers.load_data(data_path, max_length=max_length,
#                                                                                     vocab_size=vocab_size)
#     Y_trn = Y_trn[0:].toarray()
#     Y_trn = np.insert(Y_trn, 101, 0, axis=1)
#     Y_trn = np.insert(Y_trn, 102, 0, axis=1)
#     Y_tst = Y_tst[0:].toarray()
#
#     train_data = data_utils.TensorDataset(torch.from_numpy(X_trn).type(torch.LongTensor),
#                                           torch.from_numpy(Y_trn).type(torch.LongTensor))
#     test_data = data_utils.TensorDataset(torch.from_numpy(X_tst).type(torch.LongTensor),
#                                          torch.from_numpy(Y_tst).type(torch.LongTensor))
#     train_loader = data_utils.DataLoader(train_data, batch_size, drop_last=False, shuffle=True)
#     test_loader = data_utils.DataLoader(test_data, batch_size, drop_last=False)
#     return train_loader, test_loader, vocabulary, X_tst, Y_tst, X_trn, Y_trn

def precision_k(pred, label, k=[1, 3, 5]):
    batch_size = pred.shape[0]

    precision = []
    for _k in k:
        p = 0
        for i in range(batch_size):
            p += label[i, pred[i, :_k]].mean()
        precision.append(p * 100 / batch_size)

    return precision


def ndcg_k(pred, label, k=[1, 3, 5]):
    batch_size = pred.shape[0]

    ndcg = []
    for _k in k:
        score = 0
        rank = np.log2(np.arange(2, 2 + _k))
        for i in range(batch_size):
            l = label[i, pred[i, :_k]]
            n = l.sum()
            if (n == 0):
                continue

            dcg = (l / rank).sum()
            label_count = label[i].sum()
            norm = 1 / np.log2(np.arange(2, 2 + np.min((_k, label_count))))
            norm = norm.sum()
            score += dcg / norm

        ndcg.append(score * 100 / batch_size)

    return ndcg


# input data_path
# data_path = '/data/rcv1_raw_text.p'
sequence_length = 500
batch_size = 64

def load_glove_embeddings(path, embedding_dim):
    """Loading the glove embeddings"""
    with open(path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        embeddings = np.zeros((len(lines)+1, embedding_dim))
        word_idx_dict = {}
        for index, line in tqdm(enumerate(lines), desc='vocab building: '):
            values = line.split()
            word = values[0]
            word_idx_dict[word] = index + 1
            if index:
                vector = np.array(values[1:], dtype='float32')
                if vector.shape[-1] != embedding_dim:
                    raise Exception('Dimension not matching.')
                embeddings[index] = vector

    return torch.from_numpy(embeddings).float(), word_idx_dict




# load glove
pretrain = 'glove'
embedding_dim = 128
# input word2vec file path
file_path = cc_path(f'data/processed/canary/word_embeddings_{pretrain}.txt')
embedding_weights, word_idx_dict = load_glove_embeddings(file_path, embedding_dim)






vocab building: : 456009it [00:23, 19505.22it/s]


In [2]:
word_idx_dict

{'chi19mk': 1,
 'rs7839488': 2,
 'manufactories': 3,
 'li0': 4,
 'httpsclinicaltrialsgovct2shownct03924414': 5,
 'n396': 6,
 'nodiflora': 7,
 'sequestration': 8,
 '3qter': 9,
 'bov': 10,
 'tetracyclineresponsive': 11,
 'mthfd2': 12,
 'borylative': 13,
 '8477': 14,
 'ssrt3': 15,
 'interleukin12': 16,
 'peti': 17,
 't1273se': 18,
 'literal': 19,
 'ibfds': 20,
 'glutmtx': 21,
 '56178': 22,
 'm0mφs': 23,
 'rotationflipping': 24,
 '10061014': 25,
 'enthesitisrelated': 26,
 'caftaric': 27,
 '10700': 28,
 'pseudopods': 29,
 'grown': 30,
 'cox5b': 31,
 'thirdmolar': 32,
 'vcfeval': 33,
 '34secolupane': 34,
 'microrna148a3p': 35,
 'productionchondroitin': 36,
 'wrightcoombs': 37,
 'estiage': 38,
 'hksar': 39,
 '8027': 40,
 'pomderived': 41,
 'attainment': 42,
 '6induced': 43,
 'solutionbased': 44,
 'alinduced': 45,
 '103108': 46,
 'mobileevent': 47,
 '07287': 48,
 'prokka': 49,
 '09600993': 50,
 '118442': 51,
 'cerrobend': 52,
 'lrrk2nmc': 53,
 'alfalfaleucaena': 54,
 '4421733': 55,
 'hn001': 5

In [40]:
print('-' * 50)
print('Loading data...');
start_time = timeit.default_timer()
# load all the data
loc_dict = {
    'processed_csv': cc_path('data/processed/canary/articles_cleaned.csv'),
    'abstract_embeddings': cc_path('data/processed/canary/embeddings_fasttext.csv'),
    'keyword_network': cc_path('data/processed/canary/keyword_network.pickle'),
    'author_network': cc_path('data/processed/canary/author_network.pickle')
}
data_loader = DataLoader(loc_dict)
processed_df = data_loader.load_processed_csv()

embedding_df = data_loader.load_embeddings_csv()
embedding_df['pui'] = embedding_df['pui'].astype(str)
processed_df['pui'] = processed_df['pui'].astype(str)

embedding_df[embedding_df.columns.difference(['pui'])] = \
    (embedding_df[embedding_df.columns.difference(['pui'])] -
     embedding_df[embedding_df.columns.difference(['pui'])].mean()) / \
    embedding_df[embedding_df.columns.difference(['pui'])].std()

label_columns = processed_df.loc[:, ~processed_df.columns.isin(
    ['file_name', 'title', 'keywords', 'abstract', 'abstract_2', 'authors', 'organization', 'chemicals',
     'num_refs', 'date-delivered', 'labels_m', 'labels_a', 'Unnamed 0:'])]

label_columns = label_columns[label_columns.pui.isin(embedding_df.pui)].reset_index(drop=True)
abstracts_df = processed_df.loc[processed_df.pui.isin(label_columns.pui), ['pui', 'abstract']].reset_index(drop=True)

train_indices, test_indices = train_test_split(range(len(label_columns)), test_size=0.2, random_state=0)



--------------------------------------------------
Loading data...


In [41]:
abstracts_df['abstract'] = abstracts_df['abstract'].apply(lambda x: [int(word_idx_dict[word]) for word in x.split(' ')])


In [42]:
set_width = 250
abstracts_df['abstract'] = abstracts_df['abstract'].apply(lambda x: x + [0] * (set_width - len(x)) if len(x) <= set_width else x[:set_width])


In [17]:
np.array(abstracts_df.loc[train_indices, 'abstract'].to_list(), dtype=int)

KeyboardInterrupt: 

In [43]:
X_trn = np.array(abstracts_df.loc[train_indices, 'abstract'].to_list(), dtype=int)
X_tst = np.array(abstracts_df.loc[test_indices, 'abstract'].to_list(), dtype=int)


Y_trn = label_columns.loc[train_indices, label_columns.columns.difference(['pui'])].to_numpy(dtype=int)
Y_tst = label_columns.loc[test_indices, label_columns.columns.difference(['pui'])].to_numpy(dtype=int)



In [30]:
del test_loader, train_loader

In [44]:
batch_size=64
train_data = data_utils.TensorDataset(torch.from_numpy(X_trn).type(torch.LongTensor),
                                      torch.from_numpy(Y_trn).type(torch.LongTensor))
test_data = data_utils.TensorDataset(torch.from_numpy(X_tst).type(torch.LongTensor),
                                     torch.from_numpy(Y_tst).type(torch.LongTensor))
train_loader = data_utils.DataLoader(train_data, batch_size, drop_last=False, shuffle=True)
test_loader = data_utils.DataLoader(test_data, batch_size, drop_last=False)


# train_loader, test_loader, vocabulary, X_tst, Y_tst, X_trn, Y_trn = load_data(data_path, sequence_length, vocab_size,
#                                                                               batch_size)
print('Process time %.3f (secs)\n' % (timeit.default_timer() - start_time))




vocab_size = embedding_weights.size(0)

# create Network structure

Process time 44.934 (secs)



In [45]:

class BasicModule(nn.Module):
    def __init__(self):
        super(BasicModule, self).__init__()
        self.model_name = str(type(self))

    def load(self, path):
        self.load_state_dict(torch.load(path))

    def save(self, path=None):
        if path is None:
            raise ValueError('Please specify the saving road!!!')
        torch.save(self.state_dict(), path)
        return path


# In[9]:


def get_embedding_layer(embedding_weights):
    word_embeddings = nn.Embedding(num_embeddings=embedding_weights.size(0), embedding_dim=embedding_weights.size(1))
    word_embeddings.weight.data.copy_(embedding_weights)
    word_embeddings.weight.requires_grad = False  # not train
    return word_embeddings


class Hybrid_XML(BasicModule):
    def __init__(self, num_labels=3714, vocab_size=30001, embedding_size=300, embedding_weights=None,
                 max_seq=300, hidden_size=256, d_a=256, label_emb=None):
        super(Hybrid_XML, self).__init__()
        self.embedding_size = embedding_size
        self.num_labels = num_labels
        self.max_seq = max_seq
        self.hidden_size = hidden_size

        if embedding_weights is None:
            self.word_embeddings = nn.Embedding(vocab_size, embedding_size)
        else:
            self.word_embeddings = get_embedding_layer(embedding_weights)

        self.lstm = nn.LSTM(input_size=self.embedding_size, hidden_size=self.hidden_size, num_layers=1,
                            batch_first=True, bidirectional=True)

        # interaction-attention layer
        self.key_layer = torch.nn.Linear(2 * self.hidden_size, self.hidden_size)
        self.query_layer = torch.nn.Linear(self.hidden_size, self.hidden_size)

        # self-attn layer
        self.linear_first = torch.nn.Linear(2 * self.hidden_size, d_a)
        self.linear_second = torch.nn.Linear(d_a, self.num_labels)

        # weight adaptive layer
        self.linear_weight1 = torch.nn.Linear(2 * self.hidden_size, 1)
        self.linear_weight2 = torch.nn.Linear(2 * self.hidden_size, 1)

        # shared for all attention component
        self.linear_final = torch.nn.Linear(2 * self.hidden_size, self.hidden_size)
        self.output_layer = torch.nn.Linear(self.hidden_size, 1)

        label_embedding = torch.FloatTensor(self.num_labels, self.hidden_size)
        if label_emb is None:
            nn.init.xavier_normal_(label_embedding)
        else:
            label_embedding.copy_(label_emb)
        self.label_embedding = nn.Parameter(label_embedding, requires_grad=False)

    def init_hidden(self, batch_size):
        if torch.cuda.is_available():
            return (
            torch.zeros(2, batch_size, self.hidden_size).cuda(), torch.zeros(2, batch_size, self.hidden_size).cuda())
        else:
            return (torch.zeros(2, batch_size, self.hidden_size), torch.zeros(2, batch_size, self.hidden_size))

    def forward(self, x, embedding_generation=False):

        emb = self.word_embeddings(x)

        hidden_state = self.init_hidden(emb.size(0))
        output, hidden_state = self.lstm(emb, hidden_state)  # [batch,seq,2*hidden]

        # get attn_key
        attn_key = self.key_layer(output)  # [batch,seq,hidden]
        attn_key = attn_key.transpose(1, 2)  # [batch,hidden,seq]
        # get attn_query
        label_emb = self.label_embedding.expand(
            (attn_key.size(0), self.label_embedding.size(0), self.label_embedding.size(1)))  # [batch,L,label_emb]
        label_emb = self.query_layer(label_emb)  # [batch,L,label_emb]

        # attention
        similarity = torch.bmm(label_emb, attn_key)  # [batch,L,seq]
        similarity = F.softmax(similarity, dim=2)

        out1 = torch.bmm(similarity, output)  # [batch,L,label_emb]

        # self-attn output
        self_attn = torch.tanh(self.linear_first(output))  # [batch,seq,d_a]
        self_attn = self.linear_second(self_attn)  # [batch,seq,L]
        self_attn = F.softmax(self_attn, dim=1)
        self_attn = self_attn.transpose(1, 2)  # [batch,L,seq]
        out2 = torch.bmm(self_attn, output)  # [batch,L,hidden]

        factor1 = torch.sigmoid(self.linear_weight1(out1))
        factor2 = torch.sigmoid(self.linear_weight2(out2))
        factor1 = factor1 / (factor1 + factor2)
        factor2 = 1 - factor1

        out = factor1 * out1 + factor2 * out2
        
        if embedding_generation:
            return out
        
        out = F.relu(self.linear_final(out))
        out = torch.sigmoid(self.output_layer(out).squeeze(-1))  # [batch,L]

        return out


In [46]:

label_emb = np.zeros((52, 52))
label_index_mapping = {}
with open(cc_path(f'notebooks/label_embedding_test.txt')) as f:
    for index, i in enumerate(f.readlines()):
        if index == 0:
            continue
        i = i.rstrip('\n')
        n = i.split(',')[0]
        content = i.split(',')[1].split(' ')
        label_index_mapping[index-1] = n
        label_emb[index-1] = [float(value) for value in content]



label_emb = torch.from_numpy(label_emb).float()




In [47]:
model = Hybrid_XML(num_labels=52, vocab_size=len(word_idx_dict), embedding_size=128, embedding_weights=embedding_weights,
                   max_seq=250, hidden_size=52, d_a=256, label_emb=label_emb)


In [48]:
print(model)

Hybrid_XML(
  (word_embeddings): Embedding(456010, 128)
  (lstm): LSTM(128, 52, batch_first=True, bidirectional=True)
  (key_layer): Linear(in_features=104, out_features=52, bias=True)
  (query_layer): Linear(in_features=52, out_features=52, bias=True)
  (linear_first): Linear(in_features=104, out_features=256, bias=True)
  (linear_second): Linear(in_features=256, out_features=52, bias=True)
  (linear_weight1): Linear(in_features=104, out_features=1, bias=True)
  (linear_weight2): Linear(in_features=104, out_features=1, bias=True)
  (linear_final): Linear(in_features=104, out_features=52, bias=True)
  (output_layer): Linear(in_features=52, out_features=1, bias=True)
)


In [None]:
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001, weight_decay=4e-5)
criterion = torch.nn.BCELoss(reduction='sum')
epoch = 100
best_acc = 0.0
pre_acc = 0.0

# if not os.path.isdir('./rcv_log'):
#     os.makedirs('./rcv_log')
# trace_file='./rcv_log/trace_rcv.txt'

for ep in range(1, epoch + 1):
    train_loss = 0
    print("----epoch: %2d---- " % ep)
    model.train()
    for i, (data, labels) in enumerate(tqdm(train_loader, desc='Batch: ')):
        optimizer.zero_grad()

        # data = data.cuda()
        # labels = labels.cuda()

        pred = model(data)
        loss = criterion(pred, labels.float()) / pred.size(0)
        loss.backward()
        optimizer.step()

        train_loss += float(loss)
    batch_num = i + 1
    train_loss /= batch_num

    print("epoch %2d 训练结束 : avg_loss = %.4f" % (ep, train_loss))
    print("开始进行validation")
    test_loss = 0
    test_p1, test_p3, test_p5 = 0, 0, 0
    test_ndcg1, test_ndcg3, test_ndcg5 = 0, 0, 0
    model.eval()
    for i, (data, labels) in enumerate(tqdm(test_loader)):
        # data = data.cuda()
        # labels = labels.cuda()
        pred = model(data)
        loss = criterion(pred, labels.float()) / pred.size(0)

        # 计算metric
        labels_cpu = labels.data.cpu()
        pred_cpu = pred.data.cpu()

        _p1, _p3, _p5 = precision_k(pred_cpu.topk(k=5)[1].numpy(), labels_cpu.numpy(), k=[1, 3, 5])
        test_p1 += _p1
        test_p3 += _p3
        test_p5 += _p5

        _ndcg1, _ndcg3, _ndcg5 = ndcg_k(pred_cpu.topk(k=5)[1].numpy(), labels_cpu.numpy(), k=[1, 3, 5])
        test_ndcg1 += _ndcg1
        test_ndcg3 += _ndcg3
        test_ndcg5 += _ndcg5

        test_loss += float(loss)
    batch_num = i + 1
    test_loss /= batch_num

    test_p1 /= batch_num
    test_p3 /= batch_num
    test_p5 /= batch_num

    test_ndcg1 /= batch_num
    test_ndcg3 /= batch_num
    test_ndcg5 /= batch_num

    print("epoch %2d 测试结束 : avg_loss = %.4f" % (ep, test_loss))
    print("precision@1 : %.4f , precision@3 : %.4f , precision@5 : %.4f " % (test_p1, test_p3, test_p5))
    print("ndcg@1 : %.4f , ndcg@3 : %.4f , ndcg@5 : %.4f " % (test_ndcg1, test_ndcg3, test_ndcg5))

    if test_p3 < pre_acc:
        for param_group in optimizer.param_groups:
            param_group['lr'] = 0.0001
    pre_acc = test_p3

----epoch:  1---- 


Batch: 100%|██████████| 1467/1467 [01:24<00:00, 17.33it/s]


epoch  1 训练结束 : avg_loss = 12.4208
开始进行validation


100%|██████████| 367/367 [00:10<00:00, 35.90it/s]


epoch  1 测试结束 : avg_loss = 11.7889
precision@1 : 72.9938 , precision@3 : 61.4246 , precision@5 : 56.1730 
ndcg@1 : 72.9938 , ndcg@3 : 68.0398 , ndcg@5 : 67.0103 
----epoch:  2---- 


Batch: 100%|██████████| 1467/1467 [01:28<00:00, 16.65it/s]


epoch  2 训练结束 : avg_loss = 11.6842
开始进行validation


100%|██████████| 367/367 [00:11<00:00, 32.41it/s]


epoch  2 测试结束 : avg_loss = 11.2605
precision@1 : 74.3892 , precision@3 : 63.7980 , precision@5 : 57.6450 
ndcg@1 : 74.3892 , ndcg@3 : 71.2822 , ndcg@5 : 70.2122 
----epoch:  3---- 


Batch: 100%|██████████| 1467/1467 [01:29<00:00, 16.47it/s]


epoch  3 训练结束 : avg_loss = 11.1208
开始进行validation


100%|██████████| 367/367 [00:11<00:00, 32.05it/s]


epoch  3 测试结束 : avg_loss = 10.9138
precision@1 : 77.2901 , precision@3 : 65.4897 , precision@5 : 58.3980 
ndcg@1 : 77.2901 , ndcg@3 : 73.2229 , ndcg@5 : 71.5373 
----epoch:  4---- 


Batch: 100%|██████████| 1467/1467 [01:28<00:00, 16.59it/s]


epoch  4 训练结束 : avg_loss = 10.8983
开始进行validation


100%|██████████| 367/367 [00:11<00:00, 33.21it/s]


epoch  4 测试结束 : avg_loss = 10.7643
precision@1 : 78.4920 , precision@3 : 66.2859 , precision@5 : 58.9018 
ndcg@1 : 78.4920 , ndcg@3 : 74.2263 , ndcg@5 : 72.3180 
----epoch:  5---- 


Batch: 100%|██████████| 1467/1467 [01:29<00:00, 16.31it/s]


epoch  5 训练结束 : avg_loss = 10.7665
开始进行validation


100%|██████████| 367/367 [00:11<00:00, 31.14it/s]


epoch  5 测试结束 : avg_loss = 10.6720
precision@1 : 78.9561 , precision@3 : 66.5224 , precision@5 : 59.1096 
ndcg@1 : 78.9561 , ndcg@3 : 74.3024 , ndcg@5 : 72.2541 
----epoch:  6---- 


Batch: 100%|██████████| 1467/1467 [01:30<00:00, 16.17it/s]


epoch  6 训练结束 : avg_loss = 10.6525
开始进行validation


100%|██████████| 367/367 [00:11<00:00, 31.45it/s]


epoch  6 测试结束 : avg_loss = 10.6800
precision@1 : 78.2975 , precision@3 : 66.7031 , precision@5 : 59.0815 
ndcg@1 : 78.2975 , ndcg@3 : 74.1249 , ndcg@5 : 71.8912 
----epoch:  7---- 


Batch: 100%|██████████| 1467/1467 [01:31<00:00, 16.06it/s]


epoch  7 训练结束 : avg_loss = 10.5681
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 30.02it/s]


epoch  7 测试结束 : avg_loss = 10.4752
precision@1 : 79.9694 , precision@3 : 67.1019 , precision@5 : 59.4096 
ndcg@1 : 79.9694 , ndcg@3 : 75.3379 , ndcg@5 : 73.2547 
----epoch:  8---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.77it/s]


epoch  8 训练结束 : avg_loss = 10.4781
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.71it/s]


epoch  8 测试结束 : avg_loss = 10.4988
precision@1 : 79.7536 , precision@3 : 67.3801 , precision@5 : 59.6583 
ndcg@1 : 79.7536 , ndcg@3 : 75.1389 , ndcg@5 : 72.9069 
----epoch:  9---- 


Batch: 100%|██████████| 1467/1467 [01:32<00:00, 15.78it/s]


epoch  9 训练结束 : avg_loss = 10.3904
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 29.39it/s]


epoch  9 测试结束 : avg_loss = 10.3306
precision@1 : 80.3384 , precision@3 : 67.4255 , precision@5 : 59.7942 
ndcg@1 : 80.3384 , ndcg@3 : 75.6507 , ndcg@5 : 73.5753 
----epoch: 10---- 


Batch: 100%|██████████| 1467/1467 [01:32<00:00, 15.89it/s]


epoch 10 训练结束 : avg_loss = 10.3090
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 30.13it/s]


epoch 10 测试结束 : avg_loss = 10.2224
precision@1 : 80.7416 , precision@3 : 68.1001 , precision@5 : 60.2770 
ndcg@1 : 80.7416 , ndcg@3 : 76.2651 , ndcg@5 : 74.0426 
----epoch: 11---- 


Batch: 100%|██████████| 1467/1467 [01:31<00:00, 15.99it/s]


epoch 11 训练结束 : avg_loss = 10.2308
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 30.20it/s]


epoch 11 测试结束 : avg_loss = 10.1618
precision@1 : 80.8890 , precision@3 : 68.2534 , precision@5 : 60.4448 
ndcg@1 : 80.8890 , ndcg@3 : 76.3382 , ndcg@5 : 74.0944 
----epoch: 12---- 


Batch: 100%|██████████| 1467/1467 [01:29<00:00, 16.38it/s]


epoch 12 训练结束 : avg_loss = 10.1577
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 30.34it/s]


epoch 12 测试结束 : avg_loss = 10.0875
precision@1 : 81.3546 , precision@3 : 68.5647 , precision@5 : 60.5845 
ndcg@1 : 81.3546 , ndcg@3 : 76.7027 , ndcg@5 : 74.3477 
----epoch: 13---- 


Batch: 100%|██████████| 1467/1467 [01:32<00:00, 15.83it/s]


epoch 13 训练结束 : avg_loss = 10.0927
开始进行validation


100%|██████████| 367/367 [00:13<00:00, 27.77it/s]


epoch 13 测试结束 : avg_loss = 10.1147
precision@1 : 81.5675 , precision@3 : 68.5547 , precision@5 : 60.6591 
ndcg@1 : 81.5675 , ndcg@3 : 76.6526 , ndcg@5 : 74.3244 
----epoch: 14---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.63it/s]


epoch 14 训练结束 : avg_loss = 9.9396
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.88it/s]


epoch 14 测试结束 : avg_loss = 9.9294
precision@1 : 81.9550 , precision@3 : 69.0794 , precision@5 : 61.0275 
ndcg@1 : 81.9550 , ndcg@3 : 77.3050 , ndcg@5 : 74.9597 
----epoch: 15---- 


Batch: 100%|██████████| 1467/1467 [01:34<00:00, 15.56it/s]


epoch 15 训练结束 : avg_loss = 9.9150
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.73it/s]


epoch 15 测试结束 : avg_loss = 9.9143
precision@1 : 82.0329 , precision@3 : 69.1366 , precision@5 : 61.0752 
ndcg@1 : 82.0329 , ndcg@3 : 77.3929 , ndcg@5 : 75.0896 
----epoch: 16---- 


Batch: 100%|██████████| 1467/1467 [01:34<00:00, 15.54it/s]


epoch 16 训练结束 : avg_loss = 9.8998
开始进行validation


100%|██████████| 367/367 [00:13<00:00, 27.98it/s]


epoch 16 测试结束 : avg_loss = 9.9029
precision@1 : 81.9635 , precision@3 : 69.2067 , precision@5 : 61.1317 
ndcg@1 : 81.9635 , ndcg@3 : 77.4131 , ndcg@5 : 75.0747 
----epoch: 17---- 


Batch: 100%|██████████| 1467/1467 [01:35<00:00, 15.44it/s]


epoch 17 训练结束 : avg_loss = 9.8871
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.50it/s]


epoch 17 测试结束 : avg_loss = 9.9025
precision@1 : 82.1152 , precision@3 : 69.1683 , precision@5 : 61.1731 
ndcg@1 : 82.1152 , ndcg@3 : 77.3728 , ndcg@5 : 75.0942 
----epoch: 18---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.61it/s]


epoch 18 训练结束 : avg_loss = 9.8752
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 29.04it/s]


epoch 18 测试结束 : avg_loss = 9.8872
precision@1 : 82.1295 , precision@3 : 69.2090 , precision@5 : 61.1862 
ndcg@1 : 82.1295 , ndcg@3 : 77.5071 , ndcg@5 : 75.2291 
----epoch: 19---- 


Batch: 100%|██████████| 1467/1467 [01:34<00:00, 15.51it/s]


epoch 19 训练结束 : avg_loss = 9.8633
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.93it/s]


epoch 19 测试结束 : avg_loss = 9.8741
precision@1 : 82.1508 , precision@3 : 69.2653 , precision@5 : 61.2143 
ndcg@1 : 82.1508 , ndcg@3 : 77.3944 , ndcg@5 : 75.0671 
----epoch: 20---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.72it/s]


epoch 20 训练结束 : avg_loss = 9.8527
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 29.15it/s]


epoch 20 测试结束 : avg_loss = 9.8612
precision@1 : 82.2530 , precision@3 : 69.4110 , precision@5 : 61.3034 
ndcg@1 : 82.2530 , ndcg@3 : 77.6158 , ndcg@5 : 75.2576 
----epoch: 21---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.62it/s]


epoch 21 训练结束 : avg_loss = 9.8425
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.38it/s]


epoch 21 测试结束 : avg_loss = 9.8495
precision@1 : 82.2743 , precision@3 : 69.4153 , precision@5 : 61.3247 
ndcg@1 : 82.2743 , ndcg@3 : 77.6679 , ndcg@5 : 75.3419 
----epoch: 22---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.64it/s]


epoch 22 训练结束 : avg_loss = 9.8299
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.34it/s]


epoch 22 测试结束 : avg_loss = 9.8341
precision@1 : 82.3126 , precision@3 : 69.5317 , precision@5 : 61.4087 
ndcg@1 : 82.3126 , ndcg@3 : 77.7796 , ndcg@5 : 75.4545 
----epoch: 23---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.67it/s]


epoch 23 训练结束 : avg_loss = 9.8210
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 29.12it/s]


epoch 23 测试结束 : avg_loss = 9.8539
precision@1 : 82.3182 , precision@3 : 69.5231 , precision@5 : 61.3979 
ndcg@1 : 82.3182 , ndcg@3 : 77.6753 , ndcg@5 : 75.3132 
----epoch: 24---- 


Batch: 100%|██████████| 1467/1467 [01:34<00:00, 15.49it/s]


epoch 24 训练结束 : avg_loss = 9.8081
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.77it/s]


epoch 24 测试结束 : avg_loss = 9.8194
precision@1 : 82.2841 , precision@3 : 69.6211 , precision@5 : 61.4666 
ndcg@1 : 82.2841 , ndcg@3 : 77.8285 , ndcg@5 : 75.4726 
----epoch: 25---- 


Batch: 100%|██████████| 1467/1467 [01:35<00:00, 15.43it/s]


epoch 25 训练结束 : avg_loss = 9.7988
开始进行validation


100%|██████████| 367/367 [00:13<00:00, 27.93it/s]


epoch 25 测试结束 : avg_loss = 9.8066
precision@1 : 82.4161 , precision@3 : 69.6608 , precision@5 : 61.5183 
ndcg@1 : 82.4161 , ndcg@3 : 77.8831 , ndcg@5 : 75.5430 
----epoch: 26---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.63it/s]


epoch 26 训练结束 : avg_loss = 9.7882
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.49it/s]


epoch 26 测试结束 : avg_loss = 9.7955
precision@1 : 82.3962 , precision@3 : 69.7123 , precision@5 : 61.5535 
ndcg@1 : 82.3962 , ndcg@3 : 77.9407 , ndcg@5 : 75.5951 
----epoch: 27---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.66it/s]


epoch 27 训练结束 : avg_loss = 9.7789
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.94it/s]


epoch 27 测试结束 : avg_loss = 9.7890
precision@1 : 82.4132 , precision@3 : 69.8230 , precision@5 : 61.5438 
ndcg@1 : 82.4132 , ndcg@3 : 78.0716 , ndcg@5 : 75.6465 
----epoch: 28---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.63it/s]


epoch 28 训练结束 : avg_loss = 9.7683
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 29.12it/s]


epoch 28 测试结束 : avg_loss = 9.7765
precision@1 : 82.4672 , precision@3 : 69.8354 , precision@5 : 61.6006 
ndcg@1 : 82.4672 , ndcg@3 : 78.0395 , ndcg@5 : 75.6501 
----epoch: 29---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.66it/s]


epoch 29 训练结束 : avg_loss = 9.7600
开始进行validation


100%|██████████| 367/367 [00:13<00:00, 28.08it/s]


epoch 29 测试结束 : avg_loss = 9.7692
precision@1 : 82.4970 , precision@3 : 69.8462 , precision@5 : 61.6151 
ndcg@1 : 82.4970 , ndcg@3 : 78.0171 , ndcg@5 : 75.6232 
----epoch: 30---- 


Batch: 100%|██████████| 1467/1467 [01:43<00:00, 14.19it/s]


epoch 30 训练结束 : avg_loss = 9.7464
开始进行validation


100%|██████████| 367/367 [00:13<00:00, 26.37it/s]


epoch 30 测试结束 : avg_loss = 9.7881
precision@1 : 82.4558 , precision@3 : 69.7952 , precision@5 : 61.5475 
ndcg@1 : 82.4558 , ndcg@3 : 77.9681 , ndcg@5 : 75.5181 
----epoch: 31---- 


Batch: 100%|██████████| 1467/1467 [01:41<00:00, 14.44it/s]


epoch 31 训练结束 : avg_loss = 9.7382
开始进行validation


100%|██████████| 367/367 [00:13<00:00, 28.16it/s]


epoch 31 测试结束 : avg_loss = 9.7496
precision@1 : 82.6005 , precision@3 : 69.9508 , precision@5 : 61.7269 
ndcg@1 : 82.6005 , ndcg@3 : 78.1934 , ndcg@5 : 75.8122 
----epoch: 32---- 


Batch: 100%|██████████| 1467/1467 [01:35<00:00, 15.35it/s]


epoch 32 训练结束 : avg_loss = 9.7305
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.80it/s]


epoch 32 测试结束 : avg_loss = 9.7385
precision@1 : 82.6758 , precision@3 : 69.9957 , precision@5 : 61.7326 
ndcg@1 : 82.6758 , ndcg@3 : 78.2700 , ndcg@5 : 75.8510 
----epoch: 33---- 


Batch: 100%|██████████| 1467/1467 [01:34<00:00, 15.52it/s]


epoch 33 训练结束 : avg_loss = 9.7222
开始进行validation


100%|██████████| 367/367 [00:13<00:00, 27.94it/s]


epoch 33 测试结束 : avg_loss = 9.7340
precision@1 : 82.6716 , precision@3 : 70.0241 , precision@5 : 61.7857 
ndcg@1 : 82.6716 , ndcg@3 : 78.2629 , ndcg@5 : 75.8898 
----epoch: 34---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.64it/s]


epoch 34 训练结束 : avg_loss = 9.7111
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.25it/s]


epoch 34 测试结束 : avg_loss = 9.7215
precision@1 : 82.6388 , precision@3 : 70.0539 , precision@5 : 61.8058 
ndcg@1 : 82.6388 , ndcg@3 : 78.3083 , ndcg@5 : 75.9067 
----epoch: 35---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.63it/s]


epoch 35 训练结束 : avg_loss = 9.7026
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.80it/s]


epoch 35 测试结束 : avg_loss = 9.7523
precision@1 : 82.4204 , precision@3 : 69.8392 , precision@5 : 61.7087 
ndcg@1 : 82.4204 , ndcg@3 : 77.9481 , ndcg@5 : 75.5696 
----epoch: 36---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.67it/s]


epoch 36 训练结束 : avg_loss = 9.6967
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.52it/s]


epoch 36 测试结束 : avg_loss = 9.7155
precision@1 : 82.6588 , precision@3 : 70.1107 , precision@5 : 61.8390 
ndcg@1 : 82.6588 , ndcg@3 : 78.3156 , ndcg@5 : 75.8879 
----epoch: 37---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.65it/s]


epoch 37 训练结束 : avg_loss = 9.6887
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.62it/s]


epoch 37 测试结束 : avg_loss = 9.7132
precision@1 : 82.6984 , precision@3 : 70.1608 , precision@5 : 61.8643 
ndcg@1 : 82.6984 , ndcg@3 : 78.3987 , ndcg@5 : 75.9770 
----epoch: 38---- 


Batch: 100%|██████████| 1467/1467 [01:34<00:00, 15.53it/s]


epoch 38 训练结束 : avg_loss = 9.6780
开始进行validation


100%|██████████| 367/367 [00:13<00:00, 27.72it/s]


epoch 38 测试结束 : avg_loss = 9.6891
precision@1 : 82.7993 , precision@3 : 70.1778 , precision@5 : 61.8697 
ndcg@1 : 82.7993 , ndcg@3 : 78.4590 , ndcg@5 : 76.0177 
----epoch: 39---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.64it/s]


epoch 39 训练结束 : avg_loss = 9.6711
开始进行validation


100%|██████████| 367/367 [00:13<00:00, 28.22it/s]


epoch 39 测试结束 : avg_loss = 9.6919
precision@1 : 82.7993 , precision@3 : 70.1774 , precision@5 : 61.8700 
ndcg@1 : 82.7993 , ndcg@3 : 78.5049 , ndcg@5 : 76.1086 
----epoch: 40---- 


Batch: 100%|██████████| 1467/1467 [01:34<00:00, 15.45it/s]


epoch 40 训练结束 : avg_loss = 9.6605
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.43it/s]


epoch 40 测试结束 : avg_loss = 9.7172
precision@1 : 82.5452 , precision@3 : 70.1963 , precision@5 : 61.8483 
ndcg@1 : 82.5452 , ndcg@3 : 78.3585 , ndcg@5 : 75.8742 
----epoch: 41---- 


Batch: 100%|██████████| 1467/1467 [01:35<00:00, 15.32it/s]


epoch 41 训练结束 : avg_loss = 9.6535
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.60it/s]


epoch 41 测试结束 : avg_loss = 9.6627
precision@1 : 82.8389 , precision@3 : 70.3236 , precision@5 : 61.9676 
ndcg@1 : 82.8389 , ndcg@3 : 78.5688 , ndcg@5 : 76.1009 
----epoch: 42---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.64it/s]


epoch 42 训练结束 : avg_loss = 9.6440
开始进行validation


100%|██████████| 367/367 [00:13<00:00, 27.96it/s]


epoch 42 测试结束 : avg_loss = 9.6591
precision@1 : 82.8887 , precision@3 : 70.3491 , precision@5 : 61.9468 
ndcg@1 : 82.8887 , ndcg@3 : 78.6131 , ndcg@5 : 76.1224 
----epoch: 43---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.69it/s]


epoch 43 训练结束 : avg_loss = 9.6376
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 29.02it/s]


epoch 43 测试结束 : avg_loss = 9.6539
precision@1 : 83.0930 , precision@3 : 70.4272 , precision@5 : 62.0632 
ndcg@1 : 83.0930 , ndcg@3 : 78.7855 , ndcg@5 : 76.3322 
----epoch: 44---- 


Batch: 100%|██████████| 1467/1467 [01:34<00:00, 15.54it/s]


epoch 44 训练结束 : avg_loss = 9.6296
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.98it/s]


epoch 44 测试结束 : avg_loss = 9.6574
precision@1 : 82.8661 , precision@3 : 70.3586 , precision@5 : 62.0087 
ndcg@1 : 82.8661 , ndcg@3 : 78.6042 , ndcg@5 : 76.1524 
----epoch: 45---- 


Batch: 100%|██████████| 1467/1467 [01:33<00:00, 15.65it/s]


epoch 45 训练结束 : avg_loss = 9.6199
开始进行validation


100%|██████████| 367/367 [00:12<00:00, 28.84it/s]


epoch 45 测试结束 : avg_loss = 9.6712
precision@1 : 82.9853 , precision@3 : 70.3893 , precision@5 : 61.9852 
ndcg@1 : 82.9853 , ndcg@3 : 78.7177 , ndcg@5 : 76.2462 
----epoch: 46---- 


Batch:   9%|▊         | 128/1467 [00:08<01:26, 15.52it/s]

In [98]:
model.eval()
for i, (data, labels) in enumerate(tqdm(test_loader)):
    print(data)
    pred = model(data, embedding_generation=True)
    print(pred.detach().numpy().shape)
    assert False


  0%|                                                                                          | 0/157 [00:00<?, ?it/s]

tensor([[141632, 236289, 428078,  ...,      0,      0,      0],
        [385728,  70288,  84518,  ...,      0,      0,      0],
        [  4558, 345890, 205374,  ...,      0,      0,      0],
        ...,
        [303372, 199293, 386063,  ...,      0,      0,      0],
        [  1822, 431470, 299190,  ...,      0,      0,      0],
        [197196, 378696, 376918,  ...,      0,      0,      0]])


  0%|                                                                                          | 0/157 [00:00<?, ?it/s]

(64, 52, 104)





AssertionError: 

In [51]:
torch.save(model, cc_path(f'models/xml_embedding/word_embeddings_{pretrain}_20230408_all_data.txt'))


In [54]:
model = torch.load(cc_path(f'models/xml_embedding/word_embeddings_{pretrain}_20230408_all_data.txt'))


In [None]:
del embedding_df, processed_df, 

In [55]:
emb_batch_size = 1024
abstracts_to_embed = np.array(abstracts_df.loc[:, 'abstract'].to_list(), dtype=int)
puis_to_embed = np.array(abstracts_df.loc[:, 'pui'].to_list(), dtype=int)

embedding_data = data_utils.TensorDataset(torch.from_numpy(abstracts_to_embed).type(torch.LongTensor), 
                                          torch.from_numpy(puis_to_embed).type(torch.LongTensor))
final_data = data_utils.DataLoader(embedding_data, emb_batch_size, drop_last=False)

In [56]:
import pandas as pd

In [57]:
len(abstracts_df['pui'].to_numpy(dtype=int))

117310

In [101]:
num_of_embedding_dim = 104

embedding_columns =  [f'd_{i}' for i in range(52)]
xml_embedding_df = pd.DataFrame(columns=embedding_columns, index=abstracts_df['pui'].to_numpy(dtype=int))
# xml_embedding_df['embedding'] = xml_embedding_df['embedding'].astype(object)
np.set_printoptions(threshold = 100000000000000)

for i, (data, pui) in enumerate(tqdm(final_data)):
    pred = model(data, embedding_generation=True)
    
    right_puis =  list(pui.detach().numpy())
    
    numpy_preds = pred.detach().numpy()
    for idx_batch in range(numpy_preds.shape[0]):
        for idx_label in range(numpy_preds.shape[1]):
            xml_embedding_df.loc[right_puis[idx_batch], embedding_columns[idx_label]] = numpy_preds[idx_batch, 
                                                                                             idx_label, 
                                                                                             :]



100%|██████████| 115/115 [04:51<00:00,  2.54s/it]


In [102]:
xml_embedding_df.reset_index(names='pui', inplace=True)


In [103]:

xml_embedding_df.to_feather(cc_path('data/processed/canary/embeddings_xml.ftr'))

In [61]:
pd.options.display.width = 1000

In [100]:
xml_embedding_df

Unnamed: 0,d_0,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,...,d_42,d_43,d_44,d_45,d_46,d_47,d_48,d_49,d_50,d_51
624531411,"[-0.016383378, -0.11197758, -0.037253555, -0.2...","[0.010617012, -0.09705223, -0.01200425, -0.120...","[0.028110279, -0.08243386, -0.016131265, -0.18...","[0.0022158436, -0.11320748, -0.010183357, -0.1...","[0.005926796, -0.10500478, -0.010475147, -0.12...","[0.0113420375, -0.09601049, -0.014044623, -0.1...","[0.016601732, -0.09619771, -0.009100688, -0.14...","[0.11544543, -0.005443737, -0.005527966, -0.18...","[0.02586015, -0.086351044, -0.010884014, -0.16...","[0.009882777, -0.09707534, -0.021866538, -0.18...",...,"[0.18155344, 0.026362767, -0.01475883, -0.2147...","[0.0069437977, -0.09799223, -0.02816795, -0.23...","[0.18892401, 0.041114308, -0.00097465096, -0.1...","[0.11469094, -0.015334345, 0.006475803, -0.087...","[0.027374564, -0.08158906, -0.01762917, -0.206...","[0.04705087, -0.065758094, -0.018019354, -0.21...","[0.3474198, 0.15047382, 0.0039090053, -0.13961...","[0.11787519, -0.0064049475, -0.0025091413, -0....","[0.069682136, -0.054890573, -0.011223326, -0.1...","[-0.011351064, -0.100169934, -0.034848228, -0...."
625340088,"[0.06821822, -0.05620522, -0.065800205, -0.222...","[0.10693053, -0.017095909, -0.044121563, -0.09...","[0.10340122, -0.019759048, -0.055330824, -0.16...","[0.044841465, -0.09599258, -0.047074445, -0.14...","[0.042860292, -0.08393292, -0.043275695, -0.10...","[0.038487554, -0.07838504, -0.053617507, -0.16...","[0.05473292, -0.075368986, -0.042547263, -0.12...","[0.18368945, 0.09457679, -0.079515636, -0.1797...","[0.1327912, 0.019922812, -0.06676929, -0.15574...","[0.1424106, 0.018941574, -0.052223623, -0.1458...",...,"[0.24319527, 0.12634419, -0.05570019, -0.18991...","[0.12861286, 0.009207115, -0.063367575, -0.198...","[0.24438602, 0.14896263, -0.07208059, -0.14055...","[0.1749304, 0.08700106, -0.05070314, -0.066946...","[0.100104034, -0.02964319, -0.0711903, -0.2004...","[0.13491955, 0.01794957, -0.067108504, -0.1916...","[0.41909015, 0.33678198, -0.07781066, -0.12629...","[0.17230125, 0.06469397, -0.08423577, -0.16590...","[0.15537813, 0.036530755, -0.06381854, -0.1704...","[0.02809963, -0.08056283, -0.07836892, -0.2682..."
625805682,"[0.028528668, -0.060206894, 0.07548873, -0.348...","[0.042353593, -0.07522996, 0.045501076, -0.324...","[0.043712128, -0.07069669, 0.053283475, -0.341...","[0.0912251, -0.05125811, 0.106232695, -0.31711...","[0.071796775, -0.050604787, 0.09280021, -0.337...","[0.076959535, -0.04865962, 0.090105645, -0.341...","[0.077228464, -0.051366046, 0.099378556, -0.32...","[0.051017053, -0.06567388, 0.10099919, -0.2431...","[0.04003511, -0.047263972, 0.09858238, -0.3281...","[0.054769102, -0.05078943, 0.08115368, -0.2868...",...,"[0.18210159, 0.063631855, 0.05955499, -0.32273...","[0.057935737, -0.034007467, 0.08288933, -0.324...","[0.21283509, 0.080757335, 0.084314525, -0.2392...","[0.099942975, -0.049863294, 0.13453224, -0.256...","[0.046176944, -0.0769093, 0.06805101, -0.29428...","[0.037154716, -0.05500753, 0.088356555, -0.317...","[0.33445185, 0.20716941, 0.06621084, -0.234082...","[0.07703457, -0.032216277, 0.1353079, -0.22812...","[0.06902366, -0.051700268, 0.06834428, -0.3177...","[0.03009884, -0.06154553, 0.08346843, -0.34348..."
626662493,"[0.059955165, -0.02994335, -0.03311316, -0.143...","[0.10139962, 0.009669423, -0.022478202, -0.038...","[0.0790334, -0.01266022, -0.02287779, -0.10124...","[0.030228794, -0.07793985, -0.028091416, -0.13...","[0.036995795, -0.06270175, -0.0315825, -0.1039...","[0.027064249, -0.0709749, -0.031441428, -0.146...","[0.043158136, -0.059222713, -0.027855176, -0.1...","[0.13726412, 0.05951462, 0.010710327, -0.14864...","[0.083741456, 0.0010779575, 0.00037083589, -0....","[0.12572579, 0.037525304, -0.019792218, -0.073...",...,"[0.15025625, 0.0521947, -0.026349314, -0.17327...","[0.11670811, 0.030976295, -0.02641234, -0.1190...","[0.19017154, 0.094812796, -0.003392328, -0.117...","[0.13538006, 0.04384046, 0.006757291, -0.05630...","[0.07334437, -0.024985045, -0.021543493, -0.13...","[0.097782776, 0.006623201, -0.026629627, -0.12...","[0.3373683, 0.24063022, -0.0021311226, -0.1236...","[0.15725388, 0.042757872, -0.008270113, -0.101...","[0.12123768, 0.029323883, -0.025152106, -0.115...","[0.022587577, -0.067165665, -0.03826805, -0.19..."
626822402,"[0.27494726, -0.061647326, 0.18791562, -0.2993...","[0.3305084, -0.046140015, 0.18127444, -0.38189...","[0.30549046, -0.052407146, 0.18561853, -0.3563...","[0.33089703, -0.05900829, 0.19056454, -0.28315...","[0.3212989, -0.053632278, 0.18682936, -0.28965...","[0.33208808, -0.05664587, 0.18075578, -0.28696...","[0.32238388, -0.05355747, 0.18911934, -0.28512...","[0.2843451, -0.075362384, 0.17306942, -0.26655...","[0.26771063, -0.066218354, 0.17441167, -0.2618...","[0.28459507, -0.07167025, 0.17930306, -0.29999...",...,"[0.34554872, -0.114436746, 0.16829908, -0.2782...","[0.25267428, -0.080995634, 0.17711401, -0.2627...","[0.3340537, -0.093892336, 0.1561747, -0.314642...","[0.3147732, -0.065012425, 0.17388539, -0.25686...","[0.29047766, -0.05335179, 0.18333776, -0.34369...","[0.2530854, -0.07009541, 0.18177888, -0.267674...","[0.3673926, -0.15189306, 0.14653346, -0.250014...","[0.27901053, -0.060020022, 0.20105505, -0.2875...","[0.2838174, -0.074696735, 0.17902729, -0.29193...","[0.25677127, -0.058577746, 0.19382006, -0.2792..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011621972,,,,,,,,,,,...,,,,,,,,,,
2011622024,,,,,,,,,,,...,,,,,,,,,,
2011622065,,,,,,,,,,,...,,,,,,,,,,
2011626864,,,,,,,,,,,...,,,,,,,,,,
