In [1]:
import os
import shutil
import json
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow_hub as hub
import tensorflow_text as text
import numpy as np

In [2]:
gpus = tf.config.list_physical_devices('GPU')
if len(gpus)>1:
    tf.config.set_visible_devices(gpus[1], 'GPU')
    print('use gpu1')

RANDOM_SEED=68
tf.random.set_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

use gpu1


In [3]:
from transformers import TFAutoModel,AutoTokenizer,AutoConfig
model = TFAutoModel.from_pretrained('uer/roberta-base-finetuned-chinanews-chinese')
tokenizer = AutoTokenizer.from_pretrained('uer/roberta-base-finetuned-chinanews-chinese')

2022-03-21 17:54:44.842249: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-03-21 17:54:45.459970: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 6390 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 2080, pci bus id: 0000:b3:00.0, compute capability: 7.5
Some layers from the model checkpoint at uer/roberta-base-finetuned-chinanews-chinese were not used when initializing TFBertModel: ['dropout_37', 'classifier']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model)

In [4]:
desc_label_dict = {"news_story":0,
"news_culture":1,
"news_entertainment":2,
"news_sports":3,
"news_finance":4,
"news_house":5,
"news_car":6,
"news_edu":7,
"news_tech":8,
"news_military":9,
"news_travel":10,
"news_world":11,
"news_stock":12,
"news_agriculture":13,
"news_game":14}
label_desc_dict = {v:k for k,v in desc_label_dict.items()}
CLASS_SIZE = len(desc_label_dict)

In [5]:
sentences=[]
labels=[]
with open('train.json') as f:
    for l in f.readlines():
        data = json.loads(l)
        labels.append(desc_label_dict[data['label_desc']])
        sentences.append(data['sentence'])
# sentences_tensor = tf.convert_to_tensor(sentences)
labels_tensor = tf.one_hot(labels,len(desc_label_dict))

In [6]:
dev_sentences=[]
dev_labels=[]
with open('dev.json') as f:
    for l in f.readlines():
        data = json.loads(l)
        dev_labels.append(desc_label_dict[data['label_desc']])
        dev_sentences.append(data['sentence'])
# dev_sentences_tensor = tf.convert_to_tensor(dev_sentences)
dev_labels_tensor = tf.one_hot(dev_labels,len(desc_label_dict))

In [7]:
def build_classifier_model(output_size=15,encoder=model):
    input_ids = keras.Input(shape=(None,),dtype=tf.int32)
    token_type_ids = keras.Input(shape=(None,),dtype=tf.int32)
    attention_mask = keras.Input(shape=(None,),dtype=tf.int32)
    outputs = encoder(input_ids,attention_mask,token_type_ids)
    net = outputs['pooler_output']
    net = tf.keras.layers.Dropout(0.6)(net)
    net = tf.keras.layers.Dense(output_size, activation=None, name='classifier')(net)
    return tf.keras.Model([input_ids,attention_mask,token_type_ids], net)


In [8]:
cls_model = build_classifier_model()

In [9]:
train_sentences = tokenizer(sentences, padding=True, truncation=True, max_length=50,return_tensors="tf")
train_input_ids = train_sentences['input_ids']
train_token_type_ids = train_sentences['token_type_ids']
train_attention_mask = train_sentences['attention_mask']

dev_sentences = tokenizer(dev_sentences, padding=True, truncation=True, max_length=50,return_tensors="tf")
dev_input_ids = dev_sentences['input_ids']
dev_token_type_ids = dev_sentences['token_type_ids']
dev_attention_mask = dev_sentences['attention_mask']

In [10]:
adam_opt = keras.optimizers.Adam(learning_rate=2e-5)
bce = keras.losses.CategoricalCrossentropy(from_logits=True)
cls_model.compile(loss=bce, optimizer=adam_opt,metrics=['accuracy'])

In [12]:
cls_model.fit([train_input_ids,train_attention_mask,train_token_type_ids],labels_tensor,
                  validation_data = ([dev_input_ids,dev_attention_mask,dev_token_type_ids],dev_labels_tensor),
                  epochs=5,batch_size=16)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f4b88044220>

In [13]:
cls_model.fit([train_input_ids,train_attention_mask,train_token_type_ids],labels_tensor,
                  validation_data = ([dev_input_ids,dev_attention_mask,dev_token_type_ids],dev_labels_tensor),
                  epochs=5,batch_size=16)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f4ab4604a90>