In [0]:
# coding: UTF-8
%matplotlib inline
import torch
import time 
import torch.nn as nn
import torch.nn.functional as F 
from pytorch_pretrained_bert import BertModel, BertTokenizer, BertConfig, BertAdam
import pandas as pd 
import numpy as np 
from tqdm import tqdm 
from torch.utils.data import *
import matplotlib.pyplot as plt

In [0]:
def show_plot(iteration,loss):
    plt.plot(iteration,loss)
    plt.show()
    
path='data'
tokenizer = BertTokenizer("data/vocab.txt")

In [0]:
input_ids = []
input_types = []
input_masks = []
label = []
pad_size = 64
 
with open("data/data.txt") as f:
    for i, l in tqdm(enumerate(f)): 
        x1, x2, y = l.strip().split('\t')
        x1 = tokenizer.tokenize(x1)
        x2 = tokenizer.tokenize(x2)
        tokens = ["[CLS]"] + x1 + ["[SEP]"] + x2 +["[SEP]"]
        ids = tokenizer.convert_tokens_to_ids(tokens)
        types = [0] *(len(ids))
        masks = [1] * len(ids)

        if len(ids) < pad_size:
            types = types + [1] * (pad_size - len(ids))
            masks = masks + [0] * (pad_size - len(ids))
            ids = ids + [0] * (pad_size - len(ids))
        else:
            types = types[:pad_size]
            masks = masks[:pad_size]
            ids = ids[:pad_size]
        input_ids.append(ids)
        input_types.append(types)
        input_masks.append(masks)
        label.append([int(y)])

8398it [00:02, 2988.38it/s]


In [0]:
random_order = list(range(len(input_ids)))
np.random.seed(2020)
np.random.shuffle(random_order)
print(random_order[:10])

input_ids_train = np.array([input_ids[i] for i in random_order[:int(len(input_ids)*0.8)]])
input_types_train = np.array([input_types[i] for i in random_order[:int(len(input_ids)*0.8)]])
input_masks_train = np.array([input_masks[i] for i in random_order[:int(len(input_ids)*0.8)]])
y_train = np.array([label[i] for i in random_order[:int(len(input_ids) * 0.8)]])
print(input_ids_train.shape, input_types_train.shape, input_masks_train.shape, y_train.shape)

input_ids_test = np.array([input_ids[i] for i in random_order[int(len(input_ids)*0.8):]])
input_types_test = np.array([input_types[i] for i in random_order[int(len(input_ids)*0.8):]])
input_masks_test = np.array([input_masks[i] for i in random_order[int(len(input_ids)*0.8):]])
y_test = np.array([label[i] for i in random_order[int(len(input_ids) * 0.8):]])
print(input_ids_test.shape, input_types_test.shape, input_masks_test.shape, y_test.shape)

[4602, 7440, 4762, 7897, 2306, 6203, 3410, 974, 1740, 4260]
(6718, 64) (6718, 64) (6718, 64) (6718, 1)
(1680, 64) (1680, 64) (1680, 64) (1680, 1)


In [0]:
BATCH_SIZE = 32
train_data = TensorDataset(torch.LongTensor(input_ids_train), 
                           torch.LongTensor(input_types_train), 
                           torch.LongTensor(input_masks_train), 
                           torch.LongTensor(y_train))
train_sampler = RandomSampler(train_data)  
train_loader = DataLoader(train_data, sampler=train_sampler, batch_size=BATCH_SIZE)

test_data = TensorDataset(torch.LongTensor(input_ids_test), 
                          torch.LongTensor(input_types_test), 
                         torch.LongTensor(input_masks_test),
                          torch.LongTensor(y_test))
test_sampler = SequentialSampler(test_data)
test_loader = DataLoader(test_data, sampler=test_sampler, batch_size=BATCH_SIZE)

In [0]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.bert = BertModel.from_pretrained(path)
        for param in self.bert.parameters():
            param.requires_grad = True
        self.fc = nn.Linear(768, 2)

    def forward(self, x):
        context = x[0]
        types = x[1]
        mask = x[2]
        _, pooled = self.bert(context, token_type_ids=types, 
                              attention_mask=mask, 
                              output_all_encoded_layers=False)
        out = self.fc(pooled)
        return out

In [0]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Model().to(DEVICE) 

In [0]:
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}]
NUM_EPOCHS = 1
optimizer = BertAdam(optimizer_grouped_parameters,
                     lr=2e-5,
                     warmup=0.05,
                     t_total=len(train_loader) * NUM_EPOCHS
                    )

In [0]:
print('TRAINNING START!')
for epoch in range(NUM_EPOCHS):
    correct = 0
    total = 0
    cost = 0
    count = []
    loss_his = []
    acc_his = []
    for i,(x1,x2,x3,y) in enumerate(train_loader):
        x1,x2,x3,y = x1.to(device), x2.to(device), x3.to(device), y.to(device)
        y_pred = model([x1, x2, x3])
        model.zero_grad()
        loss = F.cross_entropy(y_pred, y.squeeze())
        loss.backward()
        optimizer.step()
        pred = y_pred.max(-1, keepdim=True)[1]   # .max(): 2输出，分别为最大值和最大值的index
        acc += pred.eq(y.view_as(pred)).sum().item() 
        if(i + 1) % 100 == 0:
            print(' Train Epoch: {} [{}/{} ({:.2f}%)]\n Loss: {:.6f} \n Accuracy: {}'.format(epoch,
                                                                           (i+1) * len(x1), 
                                                                           len(train_loader.dataset),
                                                                           100. * i / len(train_loader), 
                                                                           loss.item(),
                                                                           100. * acc / len(train_loader)))
        count.append(100. * i / len(train_loader))
        loss_his.append(loss.item())
        acc_his.append(100. * acc / len(train_loader))
    PATH_checkpoint = './data/1.pth'
    torch.save(net.state_dict(),PATH_checkpoint)
print("TRAINNING END")
show_plot(counter,loss_his)
show_plot(counter,acc_his)