In [14]:
import re
import os
import tensorflow as tf
import numpy as np
import pandas as pd
from tqdm import tqdm
from transformers import XLNetTokenizer, TFXLNetModel,TFXLNetPreTrainedModel,TFXLNetMainLayer,TFSequenceSummary
from transformers.modeling_tf_utils import get_initializer

In [2]:
tokenizer = XLNetTokenizer.from_pretrained('model/xlnet')

In [3]:
class InputFeatures(object):
    def __init__(self,input_ids,token_type_ids,attention_mask,label):
        self.input_ids=input_ids
        self.token_type_ids=token_type_ids
        self.attention_mask=attention_mask 
        self.label=int(label)
        
class InputExample(object):
    def __init__(self,category,query1,query2,label):
        self.re_punctuation='[{}]+'.format(''';'",.!?；‘’“”，。！？''')
        self.category=category
        self.query1=re.sub(self.re_punctuation, '', query1)
        self.query2=re.sub(self.re_punctuation, '', query2 )
        self.label=int(label)
        
    def convert_to_features(self,trans=False):
        encode_data=None
        if trans:
            encode_data=tokenizer.encode_plus(self.query2,self.query1,max_length=64,pad_to_max_length=True)
        else:
            encode_data=tokenizer.encode_plus(self.query1,self.query2,max_length=64,pad_to_max_length=True)
        return InputFeatures(encode_data['input_ids'],encode_data['token_type_ids'],encode_data['attention_mask'],self.label)

        
def read_file(data_path):
    if os.path.exists(data_path):
        return pd.read_csv(data_path).dropna()
    else:
        raise FileNotFoundError('{0} not found.'.format(data_path))

def get_examples(data_path):
    examples = []
    for i,line in read_file(data_path).iterrows():
        examples.append(InputExample(line['category'],line['query1'],line['query2'],line['label']))
    return examples

def get_features(examples):
    features=[]
    for e in examples:
        features.append(e.convert_to_features(False))
        features.append(e.convert_to_features(True))
    return features

def get_dataset(features):
    def gen():
        for ex in features:
            yield ({'input_ids': ex.input_ids,'attention_mask': ex.attention_mask,'token_type_ids': ex.token_type_ids},ex.label)
    return tf.data.Dataset.from_generator(gen,
                                          ({'input_ids': tf.int32,
                                            'attention_mask': tf.int32,
                                            'token_type_ids': tf.int32},
                                           tf.int64),
                                          ({'input_ids': tf.TensorShape([None]),
                                            'attention_mask': tf.TensorShape([None]),
                                            'token_type_ids': tf.TensorShape([None])},
                                           tf.TensorShape([])))

In [4]:
train_data = get_examples('data/train.csv')
dev_data = get_examples('data/dev.csv')

In [5]:
train_features = get_features(train_data)
dev_features = get_features(dev_data)

In [6]:
train_dataset = get_dataset(train_features)
dev_dataset = get_dataset(dev_features)

In [7]:
train_dataset = train_dataset.shuffle(256).batch(64).repeat(-1)
dev_dataset = dev_dataset.shuffle(256).batch(64).repeat(-1)

In [20]:
class TFXLNetForYiQing(TFXLNetPreTrainedModel):
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)
        self.transformer = TFXLNetMainLayer(config, name="transformer")
        self.seq_summary = TFSequenceSummary(config,name="seq_summary")
        self.first_dropout = tf.keras.layers.Dropout(0.2)
        self.lstm1 = tf.keras.layers.LSTM(128,return_sequences= True,dropout=0.2)
        self.lstm2 = tf.keras.layers.LSTM(256,dropout=0.2)
        self.logits_proj = tf.keras.layers.Dense(
            config.num_labels, kernel_initializer=get_initializer(config.initializer_range),activation='sigmoid', name="logits_proj")
    def call(self, inputs, **kwargs):
        outputs = self.transformer(inputs, **kwargs)
        outputs = self.first_dropout(outputs)
        output_summary = self.seq_summary(outputs[0])
        
        output_lstm = self.lstm1(outputs[0])
        output_lstm = self.lstm2(output_lstm)
        output = tf.concat([output_lstm,output_summary],axis=1)
        output = self.logits_proj(output)
        return output


model = TFXLNetForYiQing.from_pretrained('model/xlnet')
model.transformer.trainable=False
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [21]:
model.summary()

Model: "tfxl_net_for_yi_qing_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
transformer (TFXLNetMainLaye multiple                  116718336 
_________________________________________________________________
seq_summary (TFSequenceSumma multiple                  590592    
_________________________________________________________________
dropout_309 (Dropout)        multiple                  0         
_________________________________________________________________
lstm_11 (LSTM)               multiple                  213248    
_________________________________________________________________
lstm_12 (LSTM)               multiple                  98816     
_________________________________________________________________
logits_proj (Dense)          multiple                  897       
Total params: 117,621,889
Trainable params: 903,553
Non-trainable params: 116,718,336
________________________

In [23]:
train_steps = 273
valid_steps = 62
model.fit(train_dataset,
          epochs=8,
          steps_per_epoch=train_steps,
          validation_data=dev_dataset,
          validation_steps=valid_steps,
          verbose=2)

Train for 273 steps, validate for 62 steps
Epoch 1/8
273/273 - 108s - loss: 0.4071 - accuracy: 0.8159 - val_loss: 0.3828 - val_accuracy: 0.8465
Epoch 2/8
273/273 - 108s - loss: 0.4004 - accuracy: 0.8185 - val_loss: 0.3695 - val_accuracy: 0.8478
Epoch 3/8
273/273 - 108s - loss: 0.3987 - accuracy: 0.8224 - val_loss: 0.3779 - val_accuracy: 0.8480
Epoch 4/8
273/273 - 108s - loss: 0.3907 - accuracy: 0.8258 - val_loss: 0.3975 - val_accuracy: 0.8412
Epoch 5/8
273/273 - 108s - loss: 0.3918 - accuracy: 0.8250 - val_loss: 0.3454 - val_accuracy: 0.8609
Epoch 6/8
273/273 - 108s - loss: 0.3928 - accuracy: 0.8236 - val_loss: 0.4001 - val_accuracy: 0.8410
Epoch 7/8
273/273 - 108s - loss: 0.3933 - accuracy: 0.8264 - val_loss: 0.3413 - val_accuracy: 0.8584
Epoch 8/8
273/273 - 108s - loss: 0.3943 - accuracy: 0.8234 - val_loss: 0.3539 - val_accuracy: 0.8506


<tensorflow.python.keras.callbacks.History at 0x18a62d22b08>