# CLUE-TNEWS 今日头条中文新闻（短文本）分类

该数据集来自今日头条的新闻版块，共提取了15个类别的新闻，包括旅游，教育，金融，军事等。

训练集(53,360)，验证集(10,000)，测试集(10,000)
     
 例子：
 
 {"label": "102", "label_des": "news_entertainment", "sentence": "江疏影甜甜圈自拍，迷之角度竟这么好看，美吸引一切事物"}
 每一条数据有三个属性，从前往后分别是 分类ID，分类名称，新闻字符串（仅含标题）。

In [2]:
%reload_ext autoreload
%autoreload 2

## 1. 数据观察

In [3]:
import json
import pandas
from tqdm import tqdm
from loguru import logger
import numpy as np

In [4]:
train_file = './data/rawdata/train.json'
test_file = './data/rawdata/test.json'
dev_file = './data/rawdata/dev.json'
labels_file = './data/rawdata/labels.json'

In [5]:
def load_json_data(json_file):
    rd = open(json_file, 'r')
    lines = rd.readlines()
    rd.close()
    json_data = []
    for line in tqdm(lines):
        line = line.strip()
        line_data = json.loads(line)
        json_data.append(line_data)
    print(f"Total: {len(json_data)}")
    print(json_data[:5])
    return json_data

In [6]:
train_data = load_json_data(train_file)
test_data = load_json_data(test_file)
dev_data = load_json_data(dev_file)

100%|██████████| 53360/53360 [00:00<00:00, 196514.55it/s]
100%|██████████| 10000/10000 [00:00<00:00, 221313.11it/s]
100%|██████████| 10000/10000 [00:00<00:00, 204082.52it/s]

Total: 53360
[{'label': '108', 'label_desc': 'news_edu', 'sentence': '上课时学生手机响个不停，老师一怒之下把手机摔了，家长拿发票让老师赔，大家怎么看待这种事？', 'keywords': ''}, {'label': '104', 'label_desc': 'news_finance', 'sentence': '商赢环球股份有限公司关于延期回复上海证券交易所对公司2017年年度报告的事后审核问询函的公告', 'keywords': '商赢环球股份有限公司,年度报告,商赢环球,赢环球股份有限公司,事后审核问询函,上海证券交易所'}, {'label': '106', 'label_desc': 'news_house', 'sentence': '通过中介公司买了二手房，首付都付了，现在卖家不想卖了。怎么处理？', 'keywords': ''}, {'label': '112', 'label_desc': 'news_travel', 'sentence': '2018年去俄罗斯看世界杯得花多少钱？', 'keywords': '莫斯科,贝加尔湖,世界杯,俄罗斯,Hour'}, {'label': '109', 'label_desc': 'news_tech', 'sentence': '剃须刀的个性革新，雷明登天猫定制版新品首发', 'keywords': '剃须刀,绝地求生,定制版,战狼2,红海行动,天猫定制版三防,雷明登,维克托'}]
Total: 10000
[{'id': 0, 'sentence': '在设计史上，每当相对稳定的发展时期，这种设计思想就会成为主导', 'keywords': '民族性,设计思想,继承型设计,复古主义,服装史'}, {'id': 1, 'sentence': '利希施泰纳宣布赛季结束后离队：我需要新的挑战', 'keywords': '尤文,博洛尼亚,施泰纳,利希施泰纳,拉齐奥'}, {'id': 2, 'sentence': '庄家一般都是什么操盘思路？', 'keywords': ''}, {'id': 3, 'sentence': '王者荣耀里搅屎棍英雄都有谁？', 'keywords': ''}, {'id': 4, 'sentence':




### 1.1 样本数量分布

In [7]:
all_data = train_data + dev_data
descs = [ x['label_desc'] for x in all_data]
from collections import Counter
Counter(descs)

Counter({'news_edu': 4083,
         'news_finance': 6156,
         'news_house': 2485,
         'news_travel': 4061,
         'news_tech': 7044,
         'news_sports': 4758,
         'news_game': 4049,
         'news_culture': 4817,
         'news_car': 4909,
         'news_story': 1326,
         'news_entertainment': 5886,
         'news_military': 4348,
         'news_agriculture': 3380,
         'news_world': 5756,
         'news_stock': 302})

### 1.2 样本长度分布

In [8]:
lengths = [ len(x['sentence']) for x in all_data]
logger.info(f"***** Text Lengths *****")
logger.info(f"mean: {np.mean(lengths):.2f}")
logger.info(f"std: {np.mean(lengths):.2f}")
logger.info(f"max: {np.max(lengths)}")
logger.info(f"min: {np.min(lengths)}")

2020-06-13 00:40:12.028 | INFO     | __main__:<module>:2 - ***** Text Lengths *****
2020-06-13 00:40:12.034 | INFO     | __main__:<module>:3 - mean: 22.14
2020-06-13 00:40:12.038 | INFO     | __main__:<module>:4 - std: 22.14
2020-06-13 00:40:12.043 | INFO     | __main__:<module>:5 - max: 145
2020-06-13 00:40:12.047 | INFO     | __main__:<module>:6 - min: 2


### 1.3 样本标签

In [9]:
labels_data = load_json_data(labels_file)
label2desc = {x['label']:x['label_desc'] for x in labels_data}
desc2label = {x['label_desc']:x['label'] for x in labels_data}

100%|██████████| 15/15 [00:00<00:00, 49267.47it/s]

Total: 15
[{'label': '100', 'label_desc': 'news_story'}, {'label': '101', 'label_desc': 'news_culture'}, {'label': '102', 'label_desc': 'news_entertainment'}, {'label': '103', 'label_desc': 'news_sports'}, {'label': '104', 'label_desc': 'news_finance'}]





In [10]:
glue_labels = [ x['label_desc'] for x in labels_data]
print(f"glue_labels: {len(glue_labels)} {glue_labels}")

glue_labels: 15 ['news_story', 'news_culture', 'news_entertainment', 'news_sports', 'news_finance', 'news_house', 'news_car', 'news_edu', 'news_tech', 'news_military', 'news_travel', 'news_world', 'news_stock', 'news_agriculture', 'news_game']


## 2. 模型构建

In [11]:
import json, random
from tqdm import tqdm
from loguru import logger
from pathlib import Path
import pandas as pd
import numpy as np

import sys
sys.path.append('../../../idleuncle/theta')

from theta.modeling.glue import load_model, InputExample, GlueTrainer, get_args
from theta.modeling import load_glue_examples, show_glue_datainfo

In [12]:
seg_len = 0
seg_backoff=0

### 2.1 模型输入数据

In [13]:
# -------------------- Input Data --------------------


def clean_text(text):
    text = text.strip().replace('\n', '')
    text = text.replace('\t', ' ')
    return text


def train_data_generator(train_file):
    for i, json_data in enumerate(tqdm(train_data, desc="train")):
        guid = str(i)
        text = json_data['sentence']
        text = clean_text(text)
        label = json_data['label_desc']

        yield guid, text, None, label
        
def eval_data_generator(eval_file):
    for i, json_data in enumerate(tqdm(dev_data, desc="eval")):
        guid = str(i)
        text = json_data['sentence']
        text = clean_text(text)
        label = json_data['label_desc']

        yield guid, text, None, label
        

def test_data_generator(test_file):

    total_examples = len(test_data)
    for i, json_data in enumerate(tqdm(test_data, desc="test")):
        guid = str(json_data['id'])
        text = json_data['sentence']
        text = clean_text(text)

        yield guid, text, None, None

### 2.2 模型输出结果

In [14]:
# -------------------- Output results --------------------


def save_predict_results(args, pred_results, pred_results_file, test_examples):
    descs = []
    with open(pred_results_file, 'w') as wr:
        for label, example in zip(pred_results, test_examples):
            label_desc = args.id2label[label]
            descs.append(label_desc)
            ID = example.guid
            text = example.text_a
            json_data = {'id':ID, 'label':desc2label[label_desc], 'label_desc':label_desc, 'sentence': text}
            wr.write(f"{json.dumps(json_data, ensure_ascii=False)}\n")
    logger.info(f"Predict results file saved to {pred_results_file}")
    from collections import Counter
    print(f"{Counter(descs)}")


### 2.3 数据样本集合

以下代码通常不需要改动。

In [15]:
# -------------------- Examples --------------------

def load_train_examples(train_file):
    train_examples = load_glue_examples(train_data_generator, train_file)
    logger.info(f"Loaded {len(train_examples)} train examples.")

    return train_examples

def load_eval_examples(eval_file):
    eval_examples = load_glue_examples(eval_data_generator, eval_file)
    logger.info(f"Loaded {len(eval_examples)} eval examples.")

    return eval_examples

def load_test_examples(test_file):
    test_examples = load_glue_examples(test_data_generator, test_file)
    logger.info(f"Loaded {len(test_examples)} test examples.")

    return test_examples



### 2.4 自定义模型
Theta对每类任务都有缺省模型，通常情况下不需要自定义模型。训练器Trainer中传入参数build_model=None即可。

In [16]:
# -------------------- Model --------------------


def build_model(args):
    """
    自定义模型
    规格要求返回模型(model)、优化器(optimizer)、调度器(scheduler)三元组。
    """
    
    # -------- model --------
    from theta.modeling.ner_span import load_pretrained_model
    model = load_pretrained_model(args)
    model.to(args.device)

    # -------- optimizer --------
    from transformers.optimization import AdamW
    from theta.modeling.trainer import get_default_optimizer_parameters
    optimizer_parameters = get_default_optimizer_parameters(
        model, args.weight_decay)
    optimizer = AdamW(optimizer_parameters,
                      lr=args.learning_rate,
                      correct_bias=False)

    # -------- scheduler --------
    from transformers.optimization import get_linear_schedule_with_warmup
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=args.total_steps * args.warmup_rate,
        num_training_steps=args.total_steps)

    return model, optimizer, scheduler

### 2.5 自定训练器

训练器也不是必须定义的，可以直接用GlueTrainer实例化训练器。

自定义训练器通常是为了使用自定义模型或重载训练、评估、推理过程的关键节点，便于输出、调试等。

In [17]:
# -------------------- Trainer --------------------


class AppTrainer(GlueTrainer):
    def __init__(self, args, glue_labels):
        # 使用自定义模型时，传入build_model参数。
        super(AppTrainer, self).__init__(args, glue_labels, build_model=None)


### 2.6 主控流程

In [18]:
def main(args):
    if args.do_eda:
        show_glue_datainfo(glue_labels, train_data_generator, args.train_file,
                           test_data_generator, args.test_file)
    else:
        trainer = AppTrainer(args, glue_labels)

        if args.do_train:
            train_examples = load_train_examples(args.train_file)
            eval_examples = load_eval_examples(args.eval_file)
            trainer.train(args, train_examples, eval_examples)

        elif args.do_eval:
            eval_examples = load_eval_examples(args.eval_file)
            model = load_model(args)
            trainer.evaluate(args, model, eval_examples)

        elif args.do_predict:
            test_examples = load_test_examples(args)
            model = load_model(args)
            trainer.predict(args, model, test_examples)

            save_predict_results(
                args, trainer.pred_results,
                f"./{args.dataset_name}_predict.json",
                test_examples)



## 3. 运行

### 3.1 全局参数

In [19]:
#def add_special_args(parser):
#    return parser

#from theta.modeling.glue.args import get_args
#args = get_args([add_special_args])

import sys, argparse

def get_init_args():
    parser = argparse.ArgumentParser()
    for arg in sys.argv:
        if arg.startswith('-'):
            parser.add_argument(arg, type=str)
    args = parser.parse_args()
    return args

#import argparse
#parser = argparse.ArgumentParser()
#parser.add_argument("-f",type=str)
#args = parser.parse_args()

args = get_init_args()

DATASET_NAME="tnews"
DATA_DIR="./data"
OUTPUT_DIR=f"output_{DATASET_NAME}"
CHECKPOINT_MODEL=f"{OUTPUT_DIR}/best"
SUBMISSIONS_DIR = "./submissions"

TRAIN_FILE = "./data/rawdata/train.json"
TEST_FILE = "./data/rawdata/test.json"
EVAL_FILE = "./data/rawdata/dev.json"

EPOCHS=10
TRAIN_SAMPLE_RATE=1.0

MODEL_TYPE="bert"
PRETRAINED_MODEL="/opt/share/pretrained/pytorch/bert-base-chinese"
LEARNING_RATE=2e-5
TRAIN_MAX_SEQ_LENGTH=160
EVAL_MAX_SEQ_LENGTH=160
TRAIN_BATCH_SIZE=64
EVAL_BATCH_SIZE=64
PREDICT_BATCH_SIZE=64

args.do_train=False
args.do_predict=False
args.do_eval=False
args.train_max_seq_length = TRAIN_MAX_SEQ_LENGTH
args.eval_max_seq_length = EVAL_MAX_SEQ_LENGTH
args.num_train_epochs = EPOCHS
args.learning_rate = LEARNING_RATE
args.per_gpu_train_batch_size = TRAIN_BATCH_SIZE
args.per_gpu_eval_batch_size = EVAL_BATCH_SIZE
args.per_gpu_predict_batch_size = EVAL_BATCH_SIZE

args.data_dir = DATA_DIR
args.dataset_name = DATASET_NAME
args.train_file = TRAIN_FILE
args.eval_file = EVAL_FILE
args.test_file = TEST_FILE

args.output_dir = OUTPUT_DIR
args.pred_output_dir = OUTPUT_DIR
args.submissions_dir = SUBMISSIONS_DIR

args.enable_kd = False
args.kd_coeff = 1.0
args.kd_decay = 0.995

args.model_type = MODEL_TYPE
args.model_path = PRETRAINED_MODEL
args.overwrite_cache = True
args.train_sample_rate = TRAIN_SAMPLE_RATE
args.seed = 8864
args.local_rank=-1
args.no_cuda = None
args.do_lower_case=True
args.cache_dir = None
args.train_rate=0.8
args.fold = 0
args.gradient_accumulation_steps = 1
args.max_steps = 0
#args.loss_type = 'FocalLoss'
args.loss_type = 'CrossEntropyLoss'
args.focalloss_gamma = 1.5
args.focalloss_alpha = None
args.weight_decay = 0.0
args.warmup_rate = 0.1
args.fp16 = True
args.fp16_opt_level = 'O1'
args.max_grad_norm = 1.0
args.save_checkpoints = False
args.no_eval_on_each_epoch=False


### 3.2 启动训练

In [20]:
# args.fold = 9
# args.loss_type = 'FocalLoss'
# args.focalloss_gamma = 1.5
args.enable_kd = True
args.kd_coeff = 1.0

args.learning_rate = 4e-5

args.do_train=True
args.do_predict=False
args.do_eval=False
args.do_eda = False
args.generate_submission = False

main(args)

2020-06-13 00:40:23.325 | INFO     | theta.modeling.glue.trainer:init_labels:381 - args.label2id: {'news_story': 0, 'news_culture': 1, 'news_entertainment': 2, 'news_sports': 3, 'news_finance': 4, 'news_house': 5, 'news_car': 6, 'news_edu': 7, 'news_tech': 8, 'news_military': 9, 'news_travel': 10, 'news_world': 11, 'news_stock': 12, 'news_agriculture': 13, 'news_game': 14}
2020-06-13 00:40:23.326 | INFO     | theta.modeling.glue.trainer:init_labels:382 - args.id2label: {0: 'news_story', 1: 'news_culture', 2: 'news_entertainment', 3: 'news_sports', 4: 'news_finance', 5: 'news_house', 6: 'news_car', 7: 'news_edu', 8: 'news_tech', 9: 'news_military', 10: 'news_travel', 11: 'news_world', 12: 'news_stock', 13: 'news_agriculture', 14: 'news_game'}
2020-06-13 00:40:23.327 | INFO     | theta.modeling.glue.trainer:init_labels:383 - args.num_labels: 15
2020-06-13 00:40:23.341 | DEBUG    | theta.modeling.glue.trainer:__init__:345 - target_names: ['news_story', 'news_culture', 'news_entertainment'

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Epoch(1/10)   1/834 [..............................] - ETA: 9:54 - lr: 0.00e+00 - loss: 2.8705Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0




Epoch(1/10)  15/834 [..............................] - ETA: 10:11 - lr: 3.36e-07 - loss: 2.7362Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


2020-06-13 00:51:58.032 | INFO     | theta.modeling.trainer:train:351 - Epoch(1/10) evaluating.
2020-06-13 00:51:58.865 | INFO     | theta.modeling.trainer:evaluate:434 - Start evaluating ...
2020-06-13 00:51:58.866 | INFO     | theta.modeling.trainer:evaluate:435 -   Num examples    = 10000
2020-06-13 00:51:58.867 | INFO     | theta.modeling.trainer:evaluate:436 -   Num epoch steps = 157
2020-06-13 00:51:58.868 | INFO     | theta.modeling.trainer:evaluate:437 -   Batch size = 64




  _warn_prf(average, modifier, msg_start, len(result))
2020-06-13 00:52:33.034 | INFO     | theta.modeling.glue.trainer:__call__:367 - classify_report:
                    precision    recall  f1-score   support

        news_story       0.55      0.42      0.47       215
      news_culture       0.49      0.61      0.55       736
news_entertainment       0.57      0.60      0.59       910
       news_sports       0.71      0.70      0.71       767
      news_finance       0.46      0.60      0.52       956
        news_house       0.59      0.64      0.61       378
          news_car       0.72      0.62      0.67       791
          news_edu       0.58      0.60      0.59       646
         news_tech       0.57      0.42      0.48      1089
     news_military       0.61      0.39      0.48       716
       news_travel       0.46      0.53      0.49       693
        news_world       0.49      0.61      0.54       905
        news_stock       0.00      0.00      0.00        45
  news_

{"eval_acc": "0.563800", "eval_f1": "0.526102", "eval_acc_and_f1": "0.544951", "learning_rate": "0.000040", "loss": "1.574061", "step": 834}
 


2020-06-13 01:04:17.223 | INFO     | theta.modeling.trainer:train:351 - Epoch(2/10) evaluating.
2020-06-13 01:04:18.066 | INFO     | theta.modeling.trainer:evaluate:434 - Start evaluating ...
2020-06-13 01:04:18.067 | INFO     | theta.modeling.trainer:evaluate:435 -   Num examples    = 10000
2020-06-13 01:04:18.067 | INFO     | theta.modeling.trainer:evaluate:436 -   Num epoch steps = 157
2020-06-13 01:04:18.068 | INFO     | theta.modeling.trainer:evaluate:437 -   Batch size = 64




2020-06-13 01:04:52.308 | INFO     | theta.modeling.glue.trainer:__call__:367 - classify_report:
                    precision    recall  f1-score   support

        news_story       0.52      0.48      0.50       215
      news_culture       0.50      0.61      0.55       736
news_entertainment       0.60      0.55      0.57       910
       news_sports       0.73      0.72      0.73       767
      news_finance       0.46      0.43      0.44       956
        news_house       0.63      0.62      0.62       378
          news_car       0.75      0.56      0.64       791
          news_edu       0.54      0.63      0.58       646
         news_tech       0.53      0.59      0.56      1089
     news_military       0.51      0.68      0.58       716
       news_travel       0.48      0.51      0.50       693
        news_world       0.49      0.41      0.44       905
        news_stock       0.50      0.18      0.26        45
  news_agriculture       0.53      0.52      0.52       494
  

{"eval_acc": "0.561500", "eval_f1": "0.544701", "eval_acc_and_f1": "0.553101", "learning_rate": "0.000036", "loss": "1.220316", "step": 1668}
 


2020-06-13 01:16:37.693 | INFO     | theta.modeling.trainer:train:351 - Epoch(3/10) evaluating.
2020-06-13 01:16:38.094 | INFO     | theta.modeling.trainer:evaluate:434 - Start evaluating ...
2020-06-13 01:16:38.095 | INFO     | theta.modeling.trainer:evaluate:435 -   Num examples    = 10000
2020-06-13 01:16:38.096 | INFO     | theta.modeling.trainer:evaluate:436 -   Num epoch steps = 157
2020-06-13 01:16:38.097 | INFO     | theta.modeling.trainer:evaluate:437 -   Batch size = 64




2020-06-13 01:17:12.809 | INFO     | theta.modeling.glue.trainer:__call__:367 - classify_report:
                    precision    recall  f1-score   support

        news_story       0.52      0.39      0.45       215
      news_culture       0.52      0.57      0.54       736
news_entertainment       0.57      0.62      0.59       910
       news_sports       0.76      0.69      0.72       767
      news_finance       0.48      0.41      0.44       956
        news_house       0.60      0.61      0.61       378
          news_car       0.72      0.59      0.65       791
          news_edu       0.60      0.59      0.59       646
         news_tech       0.52      0.63      0.57      1089
     news_military       0.52      0.64      0.58       716
       news_travel       0.53      0.44      0.48       693
        news_world       0.48      0.44      0.46       905
        news_stock       0.44      0.36      0.40        45
  news_agriculture       0.53      0.58      0.55       494
  

{"eval_acc": "0.566300", "eval_f1": "0.553460", "eval_acc_and_f1": "0.559880", "learning_rate": "0.000031", "loss": "0.979170", "step": 2502}
 


2020-06-13 01:28:57.715 | INFO     | theta.modeling.trainer:train:351 - Epoch(4/10) evaluating.
2020-06-13 01:28:58.142 | INFO     | theta.modeling.trainer:evaluate:434 - Start evaluating ...
2020-06-13 01:28:58.143 | INFO     | theta.modeling.trainer:evaluate:435 -   Num examples    = 10000
2020-06-13 01:28:58.144 | INFO     | theta.modeling.trainer:evaluate:436 -   Num epoch steps = 157
2020-06-13 01:28:58.145 | INFO     | theta.modeling.trainer:evaluate:437 -   Batch size = 64




2020-06-13 01:29:32.561 | INFO     | theta.modeling.glue.trainer:__call__:367 - classify_report:
                    precision    recall  f1-score   support

        news_story       0.49      0.40      0.44       215
      news_culture       0.54      0.57      0.55       736
news_entertainment       0.56      0.64      0.60       910
       news_sports       0.75      0.73      0.74       767
      news_finance       0.48      0.49      0.49       956
        news_house       0.61      0.60      0.61       378
          news_car       0.68      0.62      0.65       791
          news_edu       0.63      0.58      0.60       646
         news_tech       0.54      0.54      0.54      1089
     news_military       0.54      0.56      0.55       716
       news_travel       0.52      0.44      0.48       693
        news_world       0.50      0.53      0.52       905
        news_stock       0.41      0.27      0.32        45
  news_agriculture       0.48      0.59      0.53       494
  

{"eval_acc": "0.570200", "eval_f1": "0.551686", "eval_acc_and_f1": "0.560943", "learning_rate": "0.000027", "loss": "0.785866", "step": 3336}
 


2020-06-13 01:41:12.513 | INFO     | theta.modeling.trainer:train:351 - Epoch(5/10) evaluating.
2020-06-13 01:41:12.910 | INFO     | theta.modeling.trainer:evaluate:434 - Start evaluating ...
2020-06-13 01:41:12.911 | INFO     | theta.modeling.trainer:evaluate:435 -   Num examples    = 10000
2020-06-13 01:41:12.912 | INFO     | theta.modeling.trainer:evaluate:436 -   Num epoch steps = 157
2020-06-13 01:41:12.913 | INFO     | theta.modeling.trainer:evaluate:437 -   Batch size = 64




2020-06-13 01:41:47.651 | INFO     | theta.modeling.glue.trainer:__call__:367 - classify_report:
                    precision    recall  f1-score   support

        news_story       0.51      0.41      0.46       215
      news_culture       0.56      0.58      0.57       736
news_entertainment       0.56      0.62      0.59       910
       news_sports       0.76      0.72      0.74       767
      news_finance       0.49      0.47      0.48       956
        news_house       0.59      0.62      0.61       378
          news_car       0.67      0.63      0.65       791
          news_edu       0.61      0.57      0.59       646
         news_tech       0.53      0.57      0.55      1089
     news_military       0.53      0.53      0.53       716
       news_travel       0.52      0.48      0.50       693
        news_world       0.50      0.51      0.51       905
        news_stock       0.50      0.44      0.47        45
  news_agriculture       0.49      0.53      0.51       494
  

{"eval_acc": "0.568700", "eval_f1": "0.560023", "eval_acc_and_f1": "0.564361", "learning_rate": "0.000022", "loss": "0.639321", "step": 4170}
 


2020-06-13 01:53:30.770 | INFO     | theta.modeling.trainer:train:351 - Epoch(6/10) evaluating.
2020-06-13 01:53:31.135 | INFO     | theta.modeling.trainer:evaluate:434 - Start evaluating ...
2020-06-13 01:53:31.136 | INFO     | theta.modeling.trainer:evaluate:435 -   Num examples    = 10000
2020-06-13 01:53:31.136 | INFO     | theta.modeling.trainer:evaluate:436 -   Num epoch steps = 157
2020-06-13 01:53:31.137 | INFO     | theta.modeling.trainer:evaluate:437 -   Batch size = 64




2020-06-13 01:54:05.438 | INFO     | theta.modeling.glue.trainer:__call__:367 - classify_report:
                    precision    recall  f1-score   support

        news_story       0.58      0.36      0.45       215
      news_culture       0.53      0.55      0.54       736
news_entertainment       0.55      0.62      0.58       910
       news_sports       0.74      0.74      0.74       767
      news_finance       0.49      0.46      0.48       956
        news_house       0.58      0.61      0.59       378
          news_car       0.69      0.60      0.64       791
          news_edu       0.58      0.62      0.60       646
         news_tech       0.53      0.58      0.55      1089
     news_military       0.53      0.58      0.55       716
       news_travel       0.52      0.46      0.49       693
        news_world       0.47      0.46      0.47       905
        news_stock       0.42      0.36      0.39        45
  news_agriculture       0.49      0.54      0.51       494
  

{"eval_acc": "0.563800", "eval_f1": "0.549559", "eval_acc_and_f1": "0.556680", "learning_rate": "0.000018", "loss": "0.545089", "step": 5004}
 


2020-06-13 02:05:44.053 | INFO     | theta.modeling.trainer:train:351 - Epoch(7/10) evaluating.
2020-06-13 02:05:44.768 | INFO     | theta.modeling.trainer:evaluate:434 - Start evaluating ...
2020-06-13 02:05:44.769 | INFO     | theta.modeling.trainer:evaluate:435 -   Num examples    = 10000
2020-06-13 02:05:44.770 | INFO     | theta.modeling.trainer:evaluate:436 -   Num epoch steps = 157
2020-06-13 02:05:44.770 | INFO     | theta.modeling.trainer:evaluate:437 -   Batch size = 64




2020-06-13 02:06:19.126 | INFO     | theta.modeling.glue.trainer:__call__:367 - classify_report:
                    precision    recall  f1-score   support

        news_story       0.54      0.40      0.46       215
      news_culture       0.55      0.54      0.55       736
news_entertainment       0.57      0.60      0.58       910
       news_sports       0.71      0.75      0.73       767
      news_finance       0.51      0.48      0.49       956
        news_house       0.59      0.64      0.61       378
          news_car       0.70      0.60      0.64       791
          news_edu       0.58      0.61      0.60       646
         news_tech       0.53      0.56      0.54      1089
     news_military       0.56      0.54      0.55       716
       news_travel       0.49      0.53      0.51       693
        news_world       0.51      0.49      0.50       905
        news_stock       0.38      0.33      0.35        45
  news_agriculture       0.48      0.53      0.50       494
  

{"eval_acc": "0.566600", "eval_f1": "0.550860", "eval_acc_and_f1": "0.558730", "learning_rate": "0.000013", "loss": "0.476954", "step": 5838}
 
Epoch(8/10)  56/834 [=>............................] - ETA: 10:50 - lr: 1.32e-05 - loss: 0.2985Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 16384.0


2020-06-13 02:17:58.631 | INFO     | theta.modeling.trainer:train:351 - Epoch(8/10) evaluating.
2020-06-13 02:17:59.015 | INFO     | theta.modeling.trainer:evaluate:434 - Start evaluating ...
2020-06-13 02:17:59.016 | INFO     | theta.modeling.trainer:evaluate:435 -   Num examples    = 10000
2020-06-13 02:17:59.017 | INFO     | theta.modeling.trainer:evaluate:436 -   Num epoch steps = 157
2020-06-13 02:17:59.017 | INFO     | theta.modeling.trainer:evaluate:437 -   Batch size = 64




2020-06-13 02:18:33.918 | INFO     | theta.modeling.glue.trainer:__call__:367 - classify_report:
                    precision    recall  f1-score   support

        news_story       0.49      0.46      0.47       215
      news_culture       0.55      0.53      0.54       736
news_entertainment       0.60      0.56      0.58       910
       news_sports       0.73      0.75      0.74       767
      news_finance       0.49      0.52      0.50       956
        news_house       0.58      0.61      0.59       378
          news_car       0.68      0.62      0.65       791
          news_edu       0.56      0.61      0.58       646
         news_tech       0.53      0.54      0.53      1089
     news_military       0.55      0.57      0.56       716
       news_travel       0.51      0.46      0.49       693
        news_world       0.50      0.50      0.50       905
        news_stock       0.44      0.42      0.43        45
  news_agriculture       0.47      0.57      0.51       494
  

{"eval_acc": "0.566100", "eval_f1": "0.555913", "eval_acc_and_f1": "0.561006", "learning_rate": "0.000009", "loss": "0.425645", "step": 6672}
 


2020-06-13 02:30:15.237 | INFO     | theta.modeling.trainer:train:351 - Epoch(9/10) evaluating.
2020-06-13 02:30:15.967 | INFO     | theta.modeling.trainer:evaluate:434 - Start evaluating ...
2020-06-13 02:30:15.968 | INFO     | theta.modeling.trainer:evaluate:435 -   Num examples    = 10000
2020-06-13 02:30:15.968 | INFO     | theta.modeling.trainer:evaluate:436 -   Num epoch steps = 157
2020-06-13 02:30:15.969 | INFO     | theta.modeling.trainer:evaluate:437 -   Batch size = 64




2020-06-13 02:30:50.444 | INFO     | theta.modeling.glue.trainer:__call__:367 - classify_report:
                    precision    recall  f1-score   support

        news_story       0.53      0.42      0.47       215
      news_culture       0.55      0.56      0.55       736
news_entertainment       0.56      0.62      0.59       910
       news_sports       0.73      0.74      0.74       767
      news_finance       0.50      0.46      0.48       956
        news_house       0.58      0.60      0.59       378
          news_car       0.68      0.62      0.65       791
          news_edu       0.58      0.58      0.58       646
         news_tech       0.53      0.56      0.54      1089
     news_military       0.54      0.54      0.54       716
       news_travel       0.51      0.48      0.49       693
        news_world       0.50      0.51      0.51       905
        news_stock       0.40      0.42      0.41        45
  news_agriculture       0.48      0.53      0.50       494
  

{"eval_acc": "0.565800", "eval_f1": "0.553684", "eval_acc_and_f1": "0.559742", "learning_rate": "0.000004", "loss": "0.390586", "step": 7506}
 


2020-06-13 02:42:32.839 | INFO     | theta.modeling.trainer:train:351 - Epoch(10/10) evaluating.
2020-06-13 02:42:33.651 | INFO     | theta.modeling.trainer:evaluate:434 - Start evaluating ...
2020-06-13 02:42:33.652 | INFO     | theta.modeling.trainer:evaluate:435 -   Num examples    = 10000
2020-06-13 02:42:33.653 | INFO     | theta.modeling.trainer:evaluate:436 -   Num epoch steps = 157
2020-06-13 02:42:33.653 | INFO     | theta.modeling.trainer:evaluate:437 -   Batch size = 64




2020-06-13 02:43:08.038 | INFO     | theta.modeling.glue.trainer:__call__:367 - classify_report:
                    precision    recall  f1-score   support

        news_story       0.53      0.40      0.46       215
      news_culture       0.55      0.55      0.55       736
news_entertainment       0.58      0.59      0.58       910
       news_sports       0.73      0.74      0.74       767
      news_finance       0.49      0.48      0.49       956
        news_house       0.59      0.61      0.60       378
          news_car       0.68      0.63      0.65       791
          news_edu       0.58      0.59      0.58       646
         news_tech       0.52      0.57      0.54      1089
     news_military       0.55      0.53      0.54       716
       news_travel       0.50      0.47      0.49       693
        news_world       0.50      0.53      0.51       905
        news_stock       0.40      0.44      0.42        45
  news_agriculture       0.48      0.53      0.50       494
  

{"eval_acc": "0.566100", "eval_f1": "0.553972", "eval_acc_and_f1": "0.560036", "learning_rate": "0.000000", "loss": "0.355363", "step": 8340}
 


### 3.3 启动推理

In [None]:
args.do_train=False
args.do_predict=True
args.do_eval=False
args.do_eda = False
args.generate_submission = False
args.model_path=CHECKPOINT_MODEL
main(args)

In [None]:
args.fold

## Load model

In [None]:


args = get_init_args()

import os
model_path="./output_tnews/best"
model_args = json.load(open(os.path.join(model_path, "training_args.json"), 'r'))
for k,v in model_args.items():
    setattr(args, k, v)
    
args

In [None]:
model = load_model(model_path)