# test

In [1]:
import os
import time
import numpy as np
import matplotlib.pyplot as plt
import loader
import torch
from torch import optim
from torch import nn
from config import config_base
from config import config_r_net
from config import config_match_lstm
from config import config_bi_daf
import preprocess_data
import utils
from modules.layers.loss import MyNLLLoss
from modules import match_lstm
from modules import r_net
from modules import bi_daf
import torch
from torch import nn
from torch.nn import functional as f
from modules.layers import embedding
from modules.layers import encoder
import utils


# config
# config = config_match_lstm.config
# config = config_r_net.config
config = config_bi_daf.config

In [2]:
    time_start = time.time()
    # prepare: collect, vocab, embedding
    preprocess_data.gen_pre_file()
    # load vocab
    lang = loader.load_vocab(config.vocab_path)
    # load w2v
    embedding_np = loader.load_w2v(config.embedding_path)

    # prepare: train_df, val_df
    if (os.path.isfile(config.train_df) is False) or (os.path.isfile(config.val_df) is False) or \
            (os.path.isfile(config.test_df) is False):
        print('gen train_df.csv, val_df.csv, test_df.csv')
        time0 = time.time()
        preprocess_data.gen_train_datafile()
        print('gen train_df.csv, val_df.csv, test_df.csv. time:%d' % (time.time()-time0))
    # load data: merge, question, answer_start, answer_end
    print('load data...')
    time0 = time.time()
    train_data = loader.load_data(config.train_df, lang)
    val_data = loader.load_data(config.val_df, lang)
    print('load data finished, time:%d' % (time.time()-time0))

    # build train, val dataloader
    train_loader = loader.build_loader(
        dataset=train_data,
        batch_size=3,
        shuffle=True,
        drop_last=True
    )
    val_loader = loader.build_loader(
        dataset=val_data,
        batch_size=config.batch_size,
        shuffle=False,
        drop_last=True
    )

    # model:
    param = {
        'embedding': embedding_np,
        'embedding_type': config.embedding_type,
        'embedding_is_training': config.embedding_is_training,
        'mode': config.mode,
        'hidden_size': config.hidden_size,
        'dropout_p': config.dropout_p,
        'encoder_dropout_p': config.encoder_dropout_p,
        'encoder_bidirectional': config.encoder_bidirectional,
        'encoder_layer_num': config.encoder_layer_num,
        'is_bn': config.is_bn
    }

load data...


Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 0.471 seconds.
Prefix dict has been built succesfully.


load data finished, time:92


In [5]:

for i, batch in enumerate(train_loader):
    xx=batch
    break

In [7]:
batch = utils.deal_batch(xx)

In [8]:
model = Model(param)
model = model.cuda()
criterion = MyNLLLoss()

In [21]:
optimizer = optim.Adam(model.parameters(), lr=config.lr)

In [10]:
outputs = model(batch)

In [20]:
for i in outputs.data.cpu().numpy().flatten():
    if -1<i<1:
        continue
    else:
        print(i)

In [52]:
optimizer.zero_grad()

In [53]:
outputs = model(batch)

In [54]:
outputs

tensor([[[ 7.3195e-03,  6.7439e-03,  6.9943e-03,  ...,  1.0000e-06,
           1.0000e-06,  1.0000e-06],
         [ 4.5989e-03,  3.3401e-03,  4.8913e-03,  ...,  4.8429e-03,
           4.7622e-03,  4.7804e-03],
         [ 4.7418e-03,  5.5454e-03,  2.9850e-03,  ...,  1.0000e-06,
           1.0000e-06,  1.0000e-06]],

        [[ 7.3974e-03,  7.4598e-03,  7.5624e-03,  ...,  1.0000e-06,
           1.0000e-06,  1.0000e-06],
         [ 5.2656e-03,  6.1020e-03,  4.9826e-03,  ...,  5.1035e-03,
           5.0299e-03,  5.1687e-03],
         [ 5.9873e-03,  6.3974e-03,  4.9574e-03,  ...,  1.0000e-06,
           1.0000e-06,  1.0000e-06]]], device='cuda:0')

In [55]:
loss_value = criterion(outputs, batch)

In [56]:
loss_value.backward()

In [57]:
optimizer.step()

In [58]:
loss_value

tensor(10.4183, device='cuda:0')

In [6]:
class Model(nn.Module):
    """ bi-rdf for reading comprehension """
    def __init__(self, param):
        super(Model, self).__init__()

        self.embedding_type = param['embedding_type']
        self.mode = param['mode']
        self.hidden_size = param['hidden_size']
        self.dropout_p = param['dropout_p']
        self.encoder_dropout_p = param['encoder_dropout_p']
        self.encoder_layer_num = param['encoder_layer_num']
        self.is_bn = param['is_bn']

        # embedding
        if self.embedding_type == 'standard':
            self.embedding = embedding.Embedding(param['embedding'])
            is_bn = False
        else:
            is_bn = True

        # encode
        self.encoder = encoder.Rnn(
            mode=self.mode,
            input_size=self.embedding.embedding_dim,
            hidden_size=self.hidden_size,
            dropout_p=self.encoder_dropout_p,
            bidirectional=True,
            layer_num=self.encoder_layer_num,
            is_bn=is_bn
        )

        # attention flow layer
        self.att_c = nn.Linear(self.hidden_size*2, 1)
        self.att_q = nn.Linear(self.hidden_size*2, 1)
        self.att_cq = nn.Linear(self.hidden_size*2, 1)

        # modeling layer
        self.modeling_rnn = encoder.Rnn(
            mode=self.mode,
            input_size=self.hidden_size*8,
            hidden_size=self.hidden_size,
            dropout_p=self.dropout_p,
            bidirectional=True,
            layer_num=2,
            is_bn=self.is_bn
        )

        # outputs
        self.p1 = nn.Linear(self.hidden_size*10, 1)
        self.p2 = nn.Linear(self.hidden_size*10, 1)

        self.rnn = encoder.Rnn(
            mode=self.mode,
            input_size=self.hidden_size*2,
            hidden_size=self.hidden_size,
            bidirectional=True,
            dropout_p=self.dropout_p,
            layer_num=1,
            is_bn=self.is_bn
        )

        self.dropout = nn.Dropout(self.dropout_p)

    def forward(self, batch):
        """
        :param batch: [content, question, answer_start, answer_end]
        :return: ans_range (2, batch_size, content_len)
        """
        def att_flow_layer(c, c_mask, q, q_mask):
            """
            attention flow layer
            :param c: (c_len, batch_size, hidden_size*2)
            :param c_mask: (batch_size, c_len)
            :param q: (q_len, batch_size, hidden_size*2)
            :param q_mask: (batch_size, q_len)
            :return: g (c_len, batch_size, hidden_size*8)
            """
            c_len = c.size(0)
            q_len = q.size(0)
            batch_size = c.size(1)

            c = self.dropout(c)
            q = self.dropout(q)

            c = c.transpose(0, 1)
            q = q.transpose(0, 1)
            cq = c.unsqueeze(2).expand(batch_size, c_len, q_len, -1) * \
                 q.unsqueeze(1).expand(batch_size, c_len, q_len, -1)
            cq = self.att_cq(cq).squeeze(3)  # (batch_size, c_len, q_len)

            s = self.att_c(c).expand(batch_size, c_len, q_len) + \
                self.att_q(q).expand(batch_size, q_len, c_len).transpose(1, 2) + \
                cq

            # 除掉空位
            mask = c_mask.eq(0)
            mask = mask.unsqueeze(2).expand(batch_size, c_len, q_len)
            s.masked_fill_(mask, -1e-6)

            mask = q_mask.eq(0)
            mask = mask.unsqueeze(1).expand(batch_size, c_len, q_len)
            s.masked_fill_(mask, -1e-6)

            # c2q
            a = f.softmax(s, dim=2)
            c2q = torch.bmm(a, q)  # (batch_size, c_len, hidden_size*2)

            # q2c
            b = torch.max(s, dim=2)[0]
            b = f.softmax(b, dim=1)  # (batch_size, c_len)
            q2c = torch.bmm(b.unsqueeze(1), c).expand(batch_size, c_len, -1)  # (batch_size, c_len, hidden_size*2)

            x = torch.cat([c, c2q, c*c2q, c*q2c], dim=2)
            x = c_mask.unsqueeze(2) * x
            x = x.transpose(0, 1)

            return x

        def output_layer(g, m, c_mask):
            """
            output layer
            :param g: (c_len, batch_size, hidden_size*8)
            :param m: (c_len, batch_size, hidden_size*2)
            :param c_mask: (batch_size, c_len)
            :return: ans_range(2, batch_size, content_len)
            """
            gm = self.dropout(torch.cat([g, m], dim=2))
            p1 = self.p1(gm).squeeze(2).transpose(0, 1)  # (batch_size, c_len)

            m = self.rnn(m, c_mask)
            gm = self.dropout(torch.cat([g, m], dim=2))
            p2 = self.p2(gm).squeeze(2).transpose(0, 1)  # (batch_size, c_len)

            mask = c_mask.eq(0)
            p1.masked_fill_(mask, -float('inf'))
            p1 = f.softmax(p1, dim=1)
            p2.masked_fill_(mask, -float('inf'))
            p2 = f.softmax(p2, dim=1)

            result = torch.stack([p1, p2])

            # add 1e-6, and no gradient explosion
            new_mask = (c_mask - 1) * (-1e-6)
            result = result + new_mask.unsqueeze(0)

            return result

        content = batch[0]
        question = batch[1]

        # mask
        content_mask = utils.get_mask(content)
        question_mask = utils.get_mask(question)

        # embedding
        content_vec = self.embedding(content)
        question_vec = self.embedding(question)

        # encode
        content_vec = self.encoder(content_vec, content_mask)
        question_vec = self.encoder(question_vec, question_mask)

        # attention flow layer
        g = att_flow_layer(content_vec, content_mask, question_vec, question_mask)

        # modeling layer
        m = self.modeling_rnn(g, content_mask)

        # outputs
        ans_range = output_layer(g, m, content_mask)

        return ans_range