In [3]:
import sys
sys.path.append('../')

import torch
from transformers import BertModel, XLNetModel
import torch.optim as optim
from utils import config
from utils.process_data import get_bert_dataloader, get_xlnet_dataloader


class BertCls(torch.nn.Module):
    def __init__(self):
        super(BertCls, self).__init__()
        self.bert = BertModel.from_pretrained(config.bert_model_path)
        self.liner = torch.nn.Sequential(
            torch.nn.BatchNorm1d(768 * 2),
            torch.nn.Dropout(),
            torch.nn.Linear(768 * 2, 256),
            torch.nn.BatchNorm1d(256),
            torch.nn.Dropout(),
            torch.nn.ReLU(),
            torch.nn.Linear(256, 1),
            torch.nn.Sigmoid()
        )

    def forward(self, ste1, ste2):
        ebd1, cls1 = self.bert(ste1)
        ebd2, cls2 = self.bert(ste2)
        conact = torch.cat((ebd1[:, 0, :], ebd2[:, 0, :]), dim=1)
        out = self.liner(conact)
        return out


class XLNetCls(torch.nn.Module):
    def __init__(self):
        super(XLNetCls, self).__init__()
        self.xlnet = XLNetModel.from_pretrained(config.XLNet_model_path)
        self.liner = torch.nn.Sequential(
            torch.nn.BatchNorm1d(768 * 2),
            torch.nn.Dropout(),
            torch.nn.Linear(768 * 2, 256),
            torch.nn.BatchNorm1d(256),
            torch.nn.Dropout(),
            torch.nn.ReLU(),
            torch.nn.Linear(256, 1),
            torch.nn.Sigmoid()
        )

    def forward(self, ste1, ste2):
        what = self.xlnet(ste1)
#         print(what)
#         ebd2, cls2 = self.xlnet(ste2)
#         conact = torch.cat((ebd1[:, 0, :], ebd2[:, 0, :]), dim=1)
#         out = self.liner(conact)
        return what


def freeze_parameter(cls_model):
    for n, p in cls_model.named_parameters():
        if 'bert' in n:
            p.requires_grad = False
    for n, p in cls_model.named_parameters():
        if 'bert.encoder.layer.11' in n:
            p.requires_grad = True


def train(model, train_data, test_data, epoch=30):
    loss_fn = torch.nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    loss_sum = 0.7
    idx = 0
    for e in range(epoch):
        for s1, s2, l in train_data:
            optimizer.zero_grad()
            y = model(s1, s2)
            loss = loss_fn(y, l)
            loss.backward()
            optimizer.step()

            # 指数平均
            loss_sum = 0.9 * loss_sum + 0.1 * loss
            if idx % 100 == 99:
                test_loss = cal_loss(model, test_data)
                print('epoch:{} iter:{} loss:{} test_loss:{}'.format(e, idx, loss_sum, test_loss))
            idx += 1


def cal_loss(model, data):
    loss_sum = 0.7
    loss_fn = torch.nn.BCELoss()
    with torch.no_grad():
        for s1, s2, l in data:
            y = model(s1, s2)
            loss = loss_fn(y, l)
            loss_sum = 0.9 * loss_sum + 0.1 * loss
    return loss_sum


def evaluate(model, test_data):
    model.eval()
    right = 0.1
    preidt_p = 0.1
    positive = 0.1
    with torch.no_grad():
        for s1, s2, l in test_data:
            y = model(s1, s2)
            y = y.cpu().view(-1).numpy()
            y[y > 0.5] = 1
            y[y <= 0.5] = 0
            preidt_p += y.sum()

            l = l.cpu().view(-1).numpy()
            positive += l.sum()
            l[l == 0] = -1
            right += (y == l).sum()
    P = right / preidt_p
    R = right / positive
    F1 = 2 * P * R / (P + R)
    print('P:{} R:{} F1:{}'.format(P, R, F1))

In [4]:
train_data, test_data = get_xlnet_dataloader()

In [5]:
cls_model = XLNetCls()
cls_model.cuda()

XLNetCls(
  (xlnet): XLNetModel(
    (word_embedding): Embedding(32000, 768)
    (layer): ModuleList(
      (0): XLNetLayer(
        (rel_attn): XLNetRelativeAttention(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (ff): XLNetFeedForward(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (layer_1): Linear(in_features=768, out_features=3072, bias=True)
          (layer_2): Linear(in_features=3072, out_features=768, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (1): XLNetLayer(
        (rel_attn): XLNetRelativeAttention(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (ff): XLNetFeedForward(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True

In [6]:
for s1, s2, l in train_data:
    y = cls_model(s1, s2)
    break

NameError: name 'train_data' is not defined

In [13]:
y[0].shape

torch.Size([32, 50, 768])

In [4]:
from transformers import BertTokenizer, XLNetTokenizer
xlnet_tokenizer = XLNetTokenizer.from_pretrained(config.XLNet_model_path)
bert_tokenizer = BertTokenizer.from_pretrained(config.bert_model_path)

In [7]:
s1='我爱你'
s2='我爱中国'
bert_tokenizer.encode(s1,s2)

[101, 2769, 4263, 872, 102, 2769, 4263, 704, 1744, 102]

In [36]:
s1='可以购买来回车票吗'
xltokens=xlnet_tokenizer.encode(s1,pad_to_max_length=True, max_length=config.sentence_max_len)
berttokens=bert_tokenizer.encode(s1,pad_to_max_length=True, max_length=config.sentence_max_len)

In [37]:
xlnet_tokenizer.decode(xltokens)

'<pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad> 可以购买来回车票吗<sep><cls>'

In [38]:
bert_tokenizer.decode(berttokens)

'[CLS] 可 以 购 买 来 回 车 票 吗 [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'