base code borrowed from [this Google Colab Notebook](https://colab.research.google.com/github/google-research/bert/blob/master/predicting_movie_reviews_with_bert_on_tf_hub.ipynb).

Refactored by [Shuyi Wang](https://www.linkedin.com/in/shuyi-wang-b3955026/)

Please refer to [this Medium Article](https://medium.com/@wshuyi/how-to-do-text-binary-classification-with-bert-f1348a25d905) for the tutorial on how to classify English text data.



In [1]:
!pip install bert-tensorflow

Collecting bert-tensorflow
[?25l  Downloading https://files.pythonhosted.org/packages/a6/66/7eb4e8b6ea35b7cc54c322c816f976167a43019750279a8473d355800a93/bert_tensorflow-1.0.1-py2.py3-none-any.whl (67kB)
[K     |████▉                           | 10kB 24.1MB/s eta 0:00:01[K     |█████████▊                      | 20kB 30.9MB/s eta 0:00:01[K     |██████████████▋                 | 30kB 36.6MB/s eta 0:00:01[K     |███████████████████▍            | 40kB 40.5MB/s eta 0:00:01[K     |████████████████████████▎       | 51kB 39.4MB/s eta 0:00:01[K     |█████████████████████████████▏  | 61kB 41.2MB/s eta 0:00:01[K     |████████████████████████████████| 71kB 7.9MB/s 
Installing collected packages: bert-tensorflow
Successfully installed bert-tensorflow-1.0.1


In [2]:
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import pickle
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization




In [0]:
def pretty_print(result):
    df = pd.DataFrame([result]).T
    df.columns = ["values"]
    return df

In [0]:
def create_tokenizer_from_hub_module(bert_model_hub):
  """
  Get the vocab file and casing info from the Hub module.
  https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1
  大写转小写，这个model TODO 这一步其实不需要吧
  """
  with tf.Graph().as_default():
    bert_module = hub.Module(bert_model_hub)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
      
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

def make_features(dataset, label_list, MAX_SEQ_LENGTH, tokenizer, DATA_COLUMN, LABEL_COLUMN):
    # run_classifier.InputExample 处理成bert可读形式
    input_example = dataset.apply(lambda x: run_classifier.InputExample(guid=None, text_a = x[DATA_COLUMN], text_b = None, label = x[LABEL_COLUMN]), axis = 1)
    # input example 转 feature TODO tokenizer 可以不要
    features = run_classifier.convert_examples_to_features(input_example, label_list, MAX_SEQ_LENGTH, tokenizer)
    return features

def create_model(bert_model_hub, is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      bert_model_hub,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)

# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(bert_model_hub, num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        bert_model_hub, is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predicted_labels)
        auc = tf.metrics.auc(
            label_ids,
            predicted_labels)
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        return {
            "eval_accuracy": accuracy,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        bert_model_hub, is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn

def estimator_builder(bert_model_hub, OUTPUT_DIR, SAVE_SUMMARY_STEPS, SAVE_CHECKPOINTS_STEPS, label_list, LEARNING_RATE, num_train_steps, num_warmup_steps, BATCH_SIZE):

    # Specify outpit directory and number of checkpoint steps to save
    run_config = tf.estimator.RunConfig(
        model_dir=OUTPUT_DIR,
        save_summary_steps=SAVE_SUMMARY_STEPS,
        save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

    model_fn = model_fn_builder(
      bert_model_hub = bert_model_hub,
      num_labels=len(label_list),
      learning_rate=LEARNING_RATE,
      num_train_steps=num_train_steps,
      num_warmup_steps=num_warmup_steps)

    estimator = tf.estimator.Estimator(
      model_fn=model_fn,
      config=run_config,
      params={"batch_size": BATCH_SIZE})
    return estimator, model_fn, run_config


In [0]:
def run_on_dfs(train, test, DATA_COLUMN, LABEL_COLUMN, 
               MAX_SEQ_LENGTH = 128,
              BATCH_SIZE = 32,
              LEARNING_RATE = 2e-5,
              NUM_TRAIN_EPOCHS = 3.0,
              WARMUP_PROPORTION = 0.1,
              SAVE_SUMMARY_STEPS = 100,
               SAVE_CHECKPOINTS_STEPS = 10000,
              bert_model_hub = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"):

    label_list = train[LABEL_COLUMN].unique().tolist()
    
    tokenizer = create_tokenizer_from_hub_module(bert_model_hub)

    train_features = make_features(train, label_list, MAX_SEQ_LENGTH, tokenizer, DATA_COLUMN, LABEL_COLUMN)
    test_features = make_features(test, label_list, MAX_SEQ_LENGTH, tokenizer, DATA_COLUMN, LABEL_COLUMN)

    num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
    num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

    estimator, model_fn, run_config = estimator_builder(
                                  bert_model_hub, 
                                  OUTPUT_DIR, 
                                  SAVE_SUMMARY_STEPS, 
                                  SAVE_CHECKPOINTS_STEPS, 
                                  label_list, 
                                  LEARNING_RATE, 
                                  num_train_steps, 
                                  num_warmup_steps, 
                                  BATCH_SIZE)

    train_input_fn = bert.run_classifier.input_fn_builder(
        features=train_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=True,
        drop_remainder=False)

    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    test_input_fn = run_classifier.input_fn_builder(
        features=test_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)

    result_dict = estimator.evaluate(input_fn=test_input_fn, steps=None)
    return result_dict, estimator
    

In [0]:
import random
random.seed(10)

In [0]:
OUTPUT_DIR = 'output'

----- you just need to focus from here ------

## Get your data

In [8]:
!wget https://github.com/wshuyi/demo-chinese-text-binary-classification-with-bert/raw/master/dianping_train_test.pickle

--2020-02-23 02:16:04--  https://github.com/wshuyi/demo-chinese-text-binary-classification-with-bert/raw/master/dianping_train_test.pickle
Resolving github.com (github.com)... 52.74.223.119
Connecting to github.com (github.com)|52.74.223.119|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/wshuyi/demo-chinese-text-binary-classification-with-bert/master/dianping_train_test.pickle [following]
--2020-02-23 02:16:05--  https://raw.githubusercontent.com/wshuyi/demo-chinese-text-binary-classification-with-bert/master/dianping_train_test.pickle
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 561978 (549K) [application/octet-stream]
Saving to: ‘dianping_train_test.pickle’


2020-02-23 02:16:06 (39.5 MB/s) - 

In [0]:
with open("dianping_train_test.pickle", 'rb') as f:
    train, test = pickle.load(f)

In [0]:
train = train.sample(len(train))

In [11]:
train.head()

Unnamed: 0,comment,sentiment
286,味道一般 不怎么好吃 该给的玻璃杯也不给了 一说有没有 一张臭脸摆出来,0
770,点了一个手撕饼，一个烟熏骨，一个口味菜花，还有酸汤肥牛，前面三个都不错，分量也很足，两个人都...,1
753,两个人5点半拿号，等到将近7:30吃上，点了熏排骨，月牙骨酸菜，锅包肉，酱油炒饭，味道差强人...,0
1188,传说很火的店 5点到了 足足等了一个小时 没敢动地方 里面有点像南京大排档 有唱曲的 服务员...,1
151,北李家的菜不可靠，不放心！好几次吃完都会胃疼，这次朋友们都反映饭后恶心，还有人夜里去了医院，...,0


In [0]:
myparam = {
        "DATA_COLUMN": "comment",
        "LABEL_COLUMN": "sentiment",
        "LEARNING_RATE": 2e-5,
        "NUM_TRAIN_EPOCHS":3,
        "bert_model_hub":"https://tfhub.dev/google/bert_chinese_L-12_H-768_A-12/1"
    }

In [13]:
result, estimator = run_on_dfs(train, test, **myparam)

INFO:tensorflow:global_step/sec: 1.04357


INFO:tensorflow:global_step/sec: 1.04357


INFO:tensorflow:loss = 0.16441922, step = 100 (95.827 sec)


INFO:tensorflow:loss = 0.16441922, step = 100 (95.827 sec)


INFO:tensorflow:Saving checkpoints for 150 into output/model.ckpt.


INFO:tensorflow:Saving checkpoints for 150 into output/model.ckpt.


INFO:tensorflow:Loss for final step: 0.011572415.


INFO:tensorflow:Loss for final step: 0.011572415.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Starting evaluation at 2020-02-23T02:21:46Z


INFO:tensorflow:Starting evaluation at 2020-02-23T02:21:46Z


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from output/model.ckpt-150


INFO:tensorflow:Restoring parameters from output/model.ckpt-150


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Finished evaluation at 2020-02-23-02:21:56


INFO:tensorflow:Finished evaluation at 2020-02-23-02:21:56


INFO:tensorflow:Saving dict for global step 150: auc = 0.8980811, eval_accuracy = 0.8975, f1_score = 0.9002432, false_negatives = 24.0, false_positives = 17.0, global_step = 150, loss = 0.37562764, precision = 0.9158416, recall = 0.8851675, true_negatives = 174.0, true_positives = 185.0


INFO:tensorflow:Saving dict for global step 150: auc = 0.8980811, eval_accuracy = 0.8975, f1_score = 0.9002432, false_negatives = 24.0, false_positives = 17.0, global_step = 150, loss = 0.37562764, precision = 0.9158416, recall = 0.8851675, true_negatives = 174.0, true_positives = 185.0


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 150: output/model.ckpt-150


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 150: output/model.ckpt-150


In [14]:
pretty_print(result)

Unnamed: 0,values
auc,0.898081
eval_accuracy,0.8975
f1_score,0.900243
false_negatives,24.0
false_positives,17.0
loss,0.375628
precision,0.915842
recall,0.885167
true_negatives,174.0
true_positives,185.0


In [25]:
def predict(train, test, DATA_COLUMN, LABEL_COLUMN, 
               MAX_SEQ_LENGTH = 128,            
              bert_model_hub = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"):
  label_list = train[LABEL_COLUMN].unique().tolist()
  tokenizer = create_tokenizer_from_hub_module(bert_model_hub)
  test_features = make_features(test, label_list, MAX_SEQ_LENGTH, tokenizer, DATA_COLUMN, LABEL_COLUMN)

  # print("label_list", label_list)  # 两类
  test_input_fn = run_classifier.input_fn_builder(
        features=test_features,
        seq_length=MAX_SEQ_LENGTH,
        is_training=False,
        drop_remainder=False)
  
  predict_results = estimator.predict(
      input_fn=test_input_fn)
      
  print("Predictions on test file")
  for prediction in predict_results: # 预测结果
    print(prediction)


myparam2 = {
        "DATA_COLUMN": "comment",
        "LABEL_COLUMN": "sentiment",
        "bert_model_hub":"https://tfhub.dev/google/bert_chinese_L-12_H-768_A-12/1"
    }
print(test[:5])
predict(train[:5] , test[:5], **myparam2)

                                             comment  sentiment
0           里面座位并没有坐满 外面等候的人比较多 不知道为什么不让进去 要了一个面上的很慢          1
1                    非常满意的一次团购，菜品不错，自助餐里算不错的了，还是团购合适          1
2  服务越来越差 东西不如以前的好了 服务员一个个的真是素质太差了 这回进餐感觉非常不好 以后绝...          0
3  没办法了不是高峰期 点的菜得一样一样催 网络不好非要微信点餐 还得让顾客自己去前台拿电子菜单...          0
4  真的不知道为什么会这么火，排了那么半天，居然给我吃这个，点的几乎都是特色，感觉除了干酪鱼其他...          0
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Writing example 0 of 5


INFO:tensorflow:Writing example 0 of 5


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] 里 面 座 位 并 没 有 坐 满 外 面 等 候 的 人 比 较 多 不 知 道 为 什 么 不 让 进 去 要 了 一 个 面 上 的 很 慢 [SEP]


INFO:tensorflow:tokens: [CLS] 里 面 座 位 并 没 有 坐 满 外 面 等 候 的 人 比 较 多 不 知 道 为 什 么 不 让 进 去 要 了 一 个 面 上 的 很 慢 [SEP]


INFO:tensorflow:input_ids: 101 7027 7481 2429 855 2400 3766 3300 1777 4007 1912 7481 5023 952 4638 782 3683 6772 1914 679 4761 6887 711 784 720 679 6375 6822 1343 6206 749 671 702 7481 677 4638 2523 2714 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 7027 7481 2429 855 2400 3766 3300 1777 4007 1912 7481 5023 952 4638 782 3683 6772 1914 679 4761 6887 711 784 720 679 6375 6822 1343 6206 749 671 702 7481 677 4638 2523 2714 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] 非 常 满 意 的 一 次 团 购 ， 菜 品 不 错 ， 自 助 餐 里 算 不 错 的 了 ， 还 是 团 购 合 适 [SEP]


INFO:tensorflow:tokens: [CLS] 非 常 满 意 的 一 次 团 购 ， 菜 品 不 错 ， 自 助 餐 里 算 不 错 的 了 ， 还 是 团 购 合 适 [SEP]


INFO:tensorflow:input_ids: 101 7478 2382 4007 2692 4638 671 3613 1730 6579 8024 5831 1501 679 7231 8024 5632 1221 7623 7027 5050 679 7231 4638 749 8024 6820 3221 1730 6579 1394 6844 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 7478 2382 4007 2692 4638 671 3613 1730 6579 8024 5831 1501 679 7231 8024 5632 1221 7623 7027 5050 679 7231 4638 749 8024 6820 3221 1730 6579 1394 6844 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] 服 务 越 来 越 差 东 西 不 如 以 前 的 好 了 服 务 员 一 个 个 的 真 是 素 质 太 差 了 这 回 进 餐 感 觉 非 常 不 好 以 后 绝 不 会 再 来 了 [SEP]


INFO:tensorflow:tokens: [CLS] 服 务 越 来 越 差 东 西 不 如 以 前 的 好 了 服 务 员 一 个 个 的 真 是 素 质 太 差 了 这 回 进 餐 感 觉 非 常 不 好 以 后 绝 不 会 再 来 了 [SEP]


INFO:tensorflow:input_ids: 101 3302 1218 6632 3341 6632 2345 691 6205 679 1963 809 1184 4638 1962 749 3302 1218 1447 671 702 702 4638 4696 3221 5162 6574 1922 2345 749 6821 1726 6822 7623 2697 6230 7478 2382 679 1962 809 1400 5318 679 833 1086 3341 749 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 3302 1218 6632 3341 6632 2345 691 6205 679 1963 809 1184 4638 1962 749 3302 1218 1447 671 702 702 4638 4696 3221 5162 6574 1922 2345 749 6821 1726 6822 7623 2697 6230 7478 2382 679 1962 809 1400 5318 679 833 1086 3341 749 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] 没 办 法 了 不 是 高 峰 期 点 的 菜 得 一 样 一 样 催 网 络 不 好 非 要 微 信 点 餐 还 得 让 顾 客 自 己 去 前 台 拿 电 子 菜 单 问 一 下 菜 品 是 什 么 做 的 一 个 服 务 员 不 理 人 另 一 个 人 说 刚 来 的 不 知 道 我 也 是 醉 了 白 堤 路 的 店 总 去 这 家 店 不 要 砸 招 牌 好 吗 [SEP]


INFO:tensorflow:tokens: [CLS] 没 办 法 了 不 是 高 峰 期 点 的 菜 得 一 样 一 样 催 网 络 不 好 非 要 微 信 点 餐 还 得 让 顾 客 自 己 去 前 台 拿 电 子 菜 单 问 一 下 菜 品 是 什 么 做 的 一 个 服 务 员 不 理 人 另 一 个 人 说 刚 来 的 不 知 道 我 也 是 醉 了 白 堤 路 的 店 总 去 这 家 店 不 要 砸 招 牌 好 吗 [SEP]


INFO:tensorflow:input_ids: 101 3766 1215 3791 749 679 3221 7770 2292 3309 4157 4638 5831 2533 671 3416 671 3416 998 5381 5317 679 1962 7478 6206 2544 928 4157 7623 6820 2533 6375 7560 2145 5632 2346 1343 1184 1378 2897 4510 2094 5831 1296 7309 671 678 5831 1501 3221 784 720 976 4638 671 702 3302 1218 1447 679 4415 782 1369 671 702 782 6432 1157 3341 4638 679 4761 6887 2769 738 3221 7004 749 4635 1837 6662 4638 2421 2600 1343 6821 2157 2421 679 6206 4790 2875 4277 1962 1408 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 3766 1215 3791 749 679 3221 7770 2292 3309 4157 4638 5831 2533 671 3416 671 3416 998 5381 5317 679 1962 7478 6206 2544 928 4157 7623 6820 2533 6375 7560 2145 5632 2346 1343 1184 1378 2897 4510 2094 5831 1296 7309 671 678 5831 1501 3221 784 720 976 4638 671 702 3302 1218 1447 679 4415 782 1369 671 702 782 6432 1157 3341 4638 679 4761 6887 2769 738 3221 7004 749 4635 1837 6662 4638 2421 2600 1343 6821 2157 2421 679 6206 4790 2875 4277 1962 1408 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] 真 的 不 知 道 为 什 么 会 这 么 火 ， 排 了 那 么 半 天 ， 居 然 给 我 吃 这 个 ， 点 的 几 乎 都 是 特 色 ， 感 觉 除 了 干 酪 鱼 其 他 全 是 雷 ， 炒 肝 的 味 道 还 算 过 得 去 ， 那 个 牛 肉 烩 饭 ？ ！ 什 么 鬼 没 有 牛 肉 只 有 不 咸 不 淡 的 牛 肉 汤 泡 饭 ， 还 有 那 个 海 鲜 疙 瘩 汤 ， 海 鲜 在 哪 ？ 别 人 评 价 里 说 有 鱿 鱼 丁 ， 我 发 誓 ！ 一 个 丁 都 没 得 ！ [SEP]


INFO:tensorflow:tokens: [CLS] 真 的 不 知 道 为 什 么 会 这 么 火 ， 排 了 那 么 半 天 ， 居 然 给 我 吃 这 个 ， 点 的 几 乎 都 是 特 色 ， 感 觉 除 了 干 酪 鱼 其 他 全 是 雷 ， 炒 肝 的 味 道 还 算 过 得 去 ， 那 个 牛 肉 烩 饭 ？ ！ 什 么 鬼 没 有 牛 肉 只 有 不 咸 不 淡 的 牛 肉 汤 泡 饭 ， 还 有 那 个 海 鲜 疙 瘩 汤 ， 海 鲜 在 哪 ？ 别 人 评 价 里 说 有 鱿 鱼 丁 ， 我 发 誓 ！ 一 个 丁 都 没 得 ！ [SEP]


INFO:tensorflow:input_ids: 101 4696 4638 679 4761 6887 711 784 720 833 6821 720 4125 8024 2961 749 6929 720 1288 1921 8024 2233 4197 5314 2769 1391 6821 702 8024 4157 4638 1126 725 6963 3221 4294 5682 8024 2697 6230 7370 749 2397 6991 7824 1071 800 1059 3221 7440 8024 4143 5498 4638 1456 6887 6820 5050 6814 2533 1343 8024 6929 702 4281 5489 4175 7649 8043 8013 784 720 7787 3766 3300 4281 5489 1372 3300 679 1496 679 3909 4638 4281 5489 3739 3796 7649 8024 6820 3300 6929 702 3862 7831 4546 4609 3739 8024 3862 7831 1762 1525 8043 1166 782 6397 817 7027 6432 3300 7825 7824 672 8024 2769 1355 6292 8013 671 702 672 6963 3766 2533 8013 102


INFO:tensorflow:input_ids: 101 4696 4638 679 4761 6887 711 784 720 833 6821 720 4125 8024 2961 749 6929 720 1288 1921 8024 2233 4197 5314 2769 1391 6821 702 8024 4157 4638 1126 725 6963 3221 4294 5682 8024 2697 6230 7370 749 2397 6991 7824 1071 800 1059 3221 7440 8024 4143 5498 4638 1456 6887 6820 5050 6814 2533 1343 8024 6929 702 4281 5489 4175 7649 8043 8013 784 720 7787 3766 3300 4281 5489 1372 3300 679 1496 679 3909 4638 4281 5489 3739 3796 7649 8024 6820 3300 6929 702 3862 7831 4546 4609 3739 8024 3862 7831 1762 1525 8043 1166 782 6397 817 7027 6432 3300 7825 7824 672 8024 2769 1355 6292 8013 671 702 672 6963 3766 2533 8013 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


Predictions on test file
INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from output/model.ckpt-150


INFO:tensorflow:Restoring parameters from output/model.ckpt-150


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


{'probabilities': array([-0.00606802, -5.1077547 ], dtype=float32), 'labels': 0}
{'probabilities': array([-4.0730515 , -0.01717199], dtype=float32), 'labels': 1}
{'probabilities': array([-3.5364013e-03, -5.6464138e+00], dtype=float32), 'labels': 0}
{'probabilities': array([-1.8872085e-03, -6.2736158e+00], dtype=float32), 'labels': 0}
{'probabilities': array([-2.4587659e-03, -6.0093441e+00], dtype=float32), 'labels': 0}
