In [1]:
import os 
import torch
import numpy as np
import pandas as pd
import torch.nn as nn 
import torch.nn.functional as F
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel
from utils.manager import Manager
from utils.util import BM25

In [3]:
class config:
    epochs = 10
    scale = "demo"
    mode = "train"
    device = 0
    batch_size = 2
    batch_size_eval = 300
    dropout_p = 0.1
    seed = 3407
    world_size = 1

    data_root = "../../../Data"
    cache_root = "data/cache"

    neighbor_num = 5
    sequence_length = 32

    plm = "bert"
    enable_gate = "weight"

    rank = 0
    verbose = None
    distributed = False
    debug = False

manager = Manager(config, notebook=True)
loaders = manager.prepare()

[2022-02-22 11:48:11,410] INFO (Manager) Hyper Parameters are:
{'scale': 'demo', 'batch_size': 2, 'batch_size_eval': 300, 'dropout_p': 0.1, 'seed': 3407, 'world_size': 1, 'neighbor_num': 5, 'sequence_length': 32, 'plm': 'bert', 'enable_gate': 'weight', 'verbose': None}

[2022-02-22 11:48:11,412] INFO (Dataset) initializing DBLP dataset from ../../../Data/DBLP/train/base.tsv...
[2022-02-22 11:48:11,431] INFO (Dataset) initializing DBLP dataset from ../../../Data/DBLP/dev/base.tsv...


In [3]:
loader_train = loaders["train"]
loader_dev = loaders["dev"]
# loader_test = loaders["test"]

dataset_train = loader_train.dataset
dataset_dev = loader_dev.dataset
# dataset_test = loader_test.dataset

X1 = iter(loader_train)
X2 = iter(loader_dev)
# X3 = iter(loader_test)
x = next(X1)
x2 = next(X2)
# x3 = next(X3)

In [4]:
# t = AutoTokenizer.from_pretrained(manager.plm_dir)
m = AutoModel.from_pretrained(manager.plm_dir).to(0)

In [6]:
m.encoder.layer

ModuleList(
  (0): BertLayer(
    (attention): BertAttention(
      (self): BertSelfAttention(
        (query): Linear(in_features=768, out_features=768, bias=True)
        (key): Linear(in_features=768, out_features=768, bias=True)
        (value): Linear(in_features=768, out_features=768, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (output): BertSelfOutput(
        (dense): Linear(in_features=768, out_features=768, bias=True)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
    (intermediate): BertIntermediate(
      (dense): Linear(in_features=768, out_features=3072, bias=True)
    )
    (output): BertOutput(
      (dense): Linear(in_features=3072, out_features=768, bias=True)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
  )
  (1): BertLayer(
    (attention): BertAttention(
      (self)

In [7]:
x["query_neighbor_mask"]

tensor([[1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1]])

In [5]:
# check gate mask
index = (0, 0)
query_token_id = x['query_token_id'][index]
query_attn_mask = x["query_attn_mask"][index]
query_gate_mask = x["query_gate_mask"][index]
key_token_id = x["key_token_id"][index]
key_attn_mask = x["key_attn_mask"][index]
key_gate_mask = x["key_gate_mask"][index]

query_token = t.convert_ids_to_tokens(query_token_id)
key_token = t.convert_ids_to_tokens(key_token_id)

line = "{:15} a g".format(" "*15)
print(line)
for i in range(manager.sequence_length):
    line = "{:15} {} {}".format(query_token[i], query_attn_mask[i], query_gate_mask[i])
    print(line)
    if query_token[i] == "[PAD]":
        break

                a g
[CLS]           1 0
a               1 1
hybrid          1 1
meta            1 1
-               1 1
he              1 1
##uri           1 1
##stic          1 1
for             1 1
the             1 1
batch           1 1
##ing           1 1
problem         1 1
in              1 1
just            1 1
-               1 0
in              1 0
-               1 0
time            1 1
flow            1 1
shops           1 1
[SEP]           1 0
[PAD]           0 0
