In [0]:
! pip install bert-tensorflow

Collecting bert-tensorflow
[?25l  Downloading https://files.pythonhosted.org/packages/a6/66/7eb4e8b6ea35b7cc54c322c816f976167a43019750279a8473d355800a93/bert_tensorflow-1.0.1-py2.py3-none-any.whl (67kB)
[K     |████████████████████████████████| 71kB 1.6MB/s 
Installing collected packages: bert-tensorflow
Successfully installed bert-tensorflow-1.0.1


## 줜나어렵대

In [0]:
import math
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import pickle
import bert
import os
from bert import run_classifier
from bert import optimization
from bert import tokenization

tf.logging.set_verbosity(tf.logging.ERROR)


def create_tokenizer_from_hub_module(bert_model_hub):
    """Get the vocab file and casing info from the Hub module."""
    with tf.Graph().as_default():
        bert_module = hub.Module(bert_model_hub)
        tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
        with tf.Session() as sess:
            vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                                  tokenization_info["do_lower_case"]])

        print("Using BERT from %s" %bert_model_hub)
        print("with vocab size=%d and do_lower_case=%s." %(len(vocab_file), str(do_lower_case)))

    return bert.tokenization.FullTokenizer(
        vocab_file=vocab_file, do_lower_case=do_lower_case)


def make_features(dataset, label_list, MAX_SEQ_LENGTH, tokenizer, DATA_COLUMN, LABEL_COLUMN):
    input_example = dataset.apply(lambda x: bert.run_classifier.InputExample(guid=None,
                                                                             text_a=x[DATA_COLUMN],
                                                                             text_b=None,
                                                                             label=x[LABEL_COLUMN]), axis=1)
    features = bert.run_classifier.convert_examples_to_features(input_example, label_list, MAX_SEQ_LENGTH, tokenizer)
    return features


def create_model(bert_model_hub, is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
    """Creates a classification model."""

    bert_module = hub.Module(
        bert_model_hub,
        trainable=True)
    bert_inputs = dict(
        input_ids=input_ids,
        input_mask=input_mask,
        segment_ids=segment_ids)
    bert_outputs = bert_module(
        inputs=bert_inputs,
        signature="tokens",
        as_dict=True)

    # Use "pooled_output" for classification tasks on an entire sentence.
    # Use "sequence_outputs" for token-level output.
    output_layer = bert_outputs["pooled_output"]

    with tf.variable_scope("output_layer"):
        layer_out = tf.layers.dense(
            inputs=output_layer,
            units=num_labels,
            use_bias=False,
            kernel_initializer=tf.initializers.variance_scaling()
        )
        predicted_labels = tf.squeeze(tf.argmax(layer_out, axis=-1, output_type=tf.int32))

        if is_predicting:
            return predicted_labels, layer_out
        else:
            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=labels,
                logits=layer_out
            )
            loss = tf.reduce_mean(loss)

            return loss, predicted_labels, layer_out


# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(bert_model_hub, num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
    """Returns `model_fn` closure for TPUEstimator."""

    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)

        # TRAIN and EVAL
        if not is_predicting:

            (loss, predicted_labels, log_probs) = create_model(
                bert_model_hub, is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

            train_op = bert.optimization.create_optimizer(
                loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

            # Calculate evaluation metrics.
            def metric_fn(label_ids, predicted_labels):
                accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
                f1_score = tf.contrib.metrics.f1_score(
                    label_ids,
                    predicted_labels)
                auc = tf.metrics.auc(
                    label_ids,
                    predicted_labels)
                recall = tf.metrics.recall(
                    label_ids,
                    predicted_labels)
                precision = tf.metrics.precision(
                    label_ids,
                    predicted_labels)
                true_pos = tf.metrics.true_positives(
                    label_ids,
                    predicted_labels)
                true_neg = tf.metrics.true_negatives(
                    label_ids,
                    predicted_labels)
                false_pos = tf.metrics.false_positives(
                    label_ids,
                    predicted_labels)
                false_neg = tf.metrics.false_negatives(
                    label_ids,
                    predicted_labels)
                return {
                    "eval_accuracy": accuracy,
                    "f1_score": f1_score,
                    "auc": auc,
                    "precision": precision,
                    "recall": recall,
                    "true_positives": true_pos,
                    "true_negatives": true_neg,
                    "false_positives": false_pos,
                    "false_negatives": false_neg
                }

            eval_metrics = metric_fn(label_ids, predicted_labels)

            if mode == tf.estimator.ModeKeys.TRAIN:
                return tf.estimator.EstimatorSpec(mode=mode,
                                                  loss=loss,
                                                  train_op=train_op)
            else:
                return tf.estimator.EstimatorSpec(mode=mode,
                                                  loss=loss,
                                                  eval_metric_ops=eval_metrics)
        else:
            (predicted_labels, log_probs) = create_model(
                bert_model_hub, is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

            predictions = {
                'probabilities': log_probs,
                'labels': predicted_labels
            }
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    # Return the actual model function in the closure
    return model_fn


def estimator_builder(bert_model_hub, OUTPUT_DIR, SAVE_SUMMARY_STEPS, SAVE_CHECKPOINTS_STEPS, label_list, LEARNING_RATE,
                      num_train_steps, num_warmup_steps, BATCH_SIZE):
    # Specify outpit directory and number of checkpoint steps to save
    run_config = tf.estimator.RunConfig(
        model_dir=OUTPUT_DIR,
        save_summary_steps=SAVE_SUMMARY_STEPS,
        save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

    model_fn = model_fn_builder(
        bert_model_hub=bert_model_hub,
        num_labels=len(label_list),
        learning_rate=LEARNING_RATE,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps)

    estimator = tf.estimator.Estimator(
        model_fn=model_fn,
        config=run_config,
        params={"batch_size": BATCH_SIZE})
    return estimator, model_fn, run_config


def run_on_dfs(train, test, data_column, label_column,
               max_seq_length=128,
               batch_size=32,
               learning_rate=2e-5,
               num_train_epochs=3,
               warmup_proportion=0.1,
               save_summary_steps=100,
               save_checkpoint_steps=10000,
               bert_model_hub="https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1",
               output_dir="output"):
    label_list = train[label_column].unique().tolist()

    tokenizer = create_tokenizer_from_hub_module(bert_model_hub)

    train_features = make_features(train, label_list, max_seq_length, tokenizer, data_column, label_column)
    test_features = make_features(test, label_list, max_seq_length, tokenizer, data_column, label_column)

    steps_per_epoch = math.ceil(len(train_features) / batch_size)

    num_train_steps = int(len(train_features) / batch_size * num_train_epochs)
    num_warmup_steps = int(num_train_steps * warmup_proportion)

    estimator, model_fn, run_config = estimator_builder(
        bert_model_hub,
        output_dir,
        save_summary_steps,
        save_checkpoint_steps,
        label_list,
        learning_rate,
        num_train_steps,
        num_warmup_steps,
        batch_size)

    train_input_fn = bert.run_classifier.input_fn_builder(
        features=train_features,
        seq_length=max_seq_length,
        is_training=True,
        drop_remainder=False)

    test_input_fn = run_classifier.input_fn_builder(
        features=test_features,
        seq_length=max_seq_length,
        is_training=False,
        drop_remainder=False)

    results = []
    for epoch in range(num_train_epochs):
        estimator.train(input_fn=train_input_fn, steps=steps_per_epoch)

        print("End of epoch %d." %(epoch + 1))

        result_dict = estimator.evaluate(input_fn=test_input_fn, steps=None)
        print(result_dict)
        results.append(result_dict)

    return results, estimator


def pretty_print(result):
    df = pd.DataFrame([result]).T
    df.columns = ["values"]
    return df

W0801 08:00:41.508820 140393886611328 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/bert/optimization.py:87: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.



In [0]:
def load_data(data_file):
    data = pd.read_csv(data_file)

    # Only use the top quartile as polite, and bottom quartile as impolite. Discard the rest.
    quantiles = data["Normalized Score"].quantile([0.25, 0.5, 0.75])
    # print(quantiles)

    for i in range(len(data)):
        score = data.loc[i, "Normalized Score"]
        if score <= quantiles[0.25]:
            # Bottom quartile (impolite).
            data.loc[i, "Normalized Score"] = 0
        elif score >= quantiles[0.75]:
            # Top quartile (polite).
            data.loc[i, "Normalized Score"] = 1
        else:
            # Neutral.
            data.loc[i, "Normalized Score"] = 2

    data["Normalized Score"] = data["Normalized Score"].astype(int)

    # Discard neutral examples.
    data = data[data["Normalized Score"] < 2]
    
    data.sample(frac=1).reset_index(drop=True)
    n_test = len(data) // 10
    test_data = data[:n_test]
    train_data = data[n_test:]
    
    print("Data loaded successfully. Train=%d, test=%d, total=%d." % (len(train_data), len(test_data), len(train_data) + len(test_data)))
    print("Some train samples:")
    print(train_data.head())
    print("Some test samples:")
    print(test_data.head())

    return train_data, test_data

In [0]:
if not os.path.exists("Stanford_politeness_corpus.zip"):
  !wget http://www.cs.cornell.edu/~cristian/Politeness_files/Stanford_politeness_corpus.zip

if not os.path.exists("Stanford_politeness_corpus/wikipedia.annotated.csv"):
  !unzip Stanford_politeness_corpus.zip

train_data, test_data = load_data("Stanford_politeness_corpus/wikipedia.annotated.csv")

params = {
    "data_column": "Request",
    "label_column": "Normalized Score",
#     "learning_rate": 2e-5,
    "batch_size": 16,
    "num_train_epochs": 3,
    "bert_model_hub": "https://tfhub.dev/google/bert_cased_L-12_H-768_A-12/1"
}

tf.logging.set_verbosity(tf.logging.INFO)
result, estimator = run_on_dfs(train_data, test_data, **params)
print(result)

Data loaded successfully. Train=1961, test=217, total=2178.
Some train samples:
     Community      Id  ...         TurkId5  Normalized Score
460  Wikipedia  621480  ...  A1Y3Z92RE62NPS                 1
462  Wikipedia  146267  ...  A3IHLWMZNBLUR4                 1
463  Wikipedia   84242  ...   AIPK94CUWL45W                 1
464  Wikipedia  487517  ...  A1F4D2PZ7NNWTL                 1
466  Wikipedia  629492  ...  A2WZQ92N4809N1                 1

[5 rows x 14 columns]
Some test samples:
   Community      Id  ...         TurkId5  Normalized Score
0  Wikipedia  629705  ...  A15DM9BMKZZJQ6                 0
1  Wikipedia  244336  ...  A3TFQK7QK8X6LM                 1
5  Wikipedia  214411  ...  A1Y3Z92RE62NPS                 1
8  Wikipedia  177439  ...  A29B522D0BX6HN                 0
9  Wikipedia  341534  ...  A28TXBSZPWMEU9                 0

[5 rows x 14 columns]


I0801 08:01:00.208889 140393886611328 saver.py:1499] Saver not created because there are no variables in the graph to restore
W0801 08:01:01.246524 140393886611328 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/bert/tokenization.py:125: The name tf.gfile.GFile is deprecated. Please use tf.io.gfile.GFile instead.

W0801 08:01:01.409889 140393886611328 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/bert/run_classifier.py:774: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.

I0801 08:01:01.411023 140393886611328 run_classifier.py:774] Writing example 0 of 1961
I0801 08:01:01.414020 140393886611328 run_classifier.py:461] *** Example ***
I0801 08:01:01.415405 140393886611328 run_classifier.py:462] guid: None
I0801 08:01:01.416747 140393886611328 run_classifier.py:464] tokens: [CLS] Thanks . As an aside , since this did turn out to be fact ##ual , just very hard to source , do you think the community woul

Using BERT from https://tfhub.dev/google/bert_cased_L-12_H-768_A-12/1
with vocab size=76 and do_lower_case=False.


I0801 08:01:01.446197 140393886611328 run_classifier.py:464] tokens: [CLS] Thanks for your help on this , it ' s much appreciated . Should I del ##ete my request for check ##user ? [SEP]
I0801 08:01:01.447568 140393886611328 run_classifier.py:465] input_ids: 101 5749 1111 1240 1494 1113 1142 117 1122 112 188 1277 12503 119 9743 146 3687 16618 1139 4566 1111 4031 19399 136 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
I0801 08:01:01.448529 140393886611328 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
I0801 08:01:01.449416 140393886611328 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 

End of epoch 1.


I0801 08:03:12.120138 140393886611328 saver.py:1499] Saver not created because there are no variables in the graph to restore
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
I0801 08:03:21.064182 140393886611328 estimator.py:1147] Done calling model_fn.
I0801 08:03:21.086401 140393886611328 evaluation.py:255] Starting evaluation at 2019-08-01T08:03:21Z
I0801 08:03:22.733125 140393886611328 monitored_session.py:240] Graph was finalized.
W0801 08:03:22.742737 140393886611328 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py:1276: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
I0801 08:03:22.748574 140393886611328 saver.py:1280] Restoring parameters from output/model.ckpt-123
I0801 08:03:25.147695 140393886611328 session_manager.py:500] Running loca

{'auc': 0.8714286, 'eval_accuracy': 0.87096775, 'f1_score': 0.8727272, 'false_negatives': 16.0, 'false_positives': 12.0, 'loss': 0.36494896, 'precision': 0.8888889, 'recall': 0.85714287, 'true_negatives': 93.0, 'true_positives': 96.0, 'global_step': 123}


I0801 08:03:32.709995 140393886611328 estimator.py:1145] Calling model_fn.
I0801 08:03:35.998855 140393886611328 saver.py:1499] Saver not created because there are no variables in the graph to restore
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
I0801 08:03:44.996291 140393886611328 estimator.py:1147] Done calling model_fn.
I0801 08:03:44.999146 140393886611328 basic_session_run_hooks.py:541] Create CheckpointSaverHook.
I0801 08:03:46.701639 140393886611328 monitored_session.py:240] Graph was finalized.
I0801 08:03:46.714601 140393886611328 saver.py:1280] Restoring parameters from output/model.ckpt-123
W0801 08:03:48.571978 140393886611328 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py:1066: get_checkpoint_mtimes (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file utilities to get mtimes.
I0801 08:03:49.

End of epoch 2.


I0801 08:05:34.937198 140393886611328 saver.py:1499] Saver not created because there are no variables in the graph to restore
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
I0801 08:05:43.920698 140393886611328 estimator.py:1147] Done calling model_fn.
I0801 08:05:43.944471 140393886611328 evaluation.py:255] Starting evaluation at 2019-08-01T08:05:43Z
I0801 08:05:45.591808 140393886611328 monitored_session.py:240] Graph was finalized.
I0801 08:05:45.603909 140393886611328 saver.py:1280] Restoring parameters from output/model.ckpt-246
I0801 08:05:48.059813 140393886611328 session_manager.py:500] Running local_init_op.
I0801 08:05:48.304451 140393886611328 session_manager.py:502] Done running local_init_op.
I0801 08:05:52.284791 140393886611328 evaluation.py:275] Finished evaluation at 2019-08-01-08:05:52
I0801 08:05:52.286082 140393886611328 estimator.py:2039] Saving dict for global step 246: auc = 0.8732143, eval_accuracy = 0.87096775, f1_score = 0.8653845, fa

{'auc': 0.8732143, 'eval_accuracy': 0.87096775, 'f1_score': 0.8653845, 'false_negatives': 22.0, 'false_positives': 6.0, 'loss': 0.4843882, 'precision': 0.9375, 'recall': 0.8035714, 'true_negatives': 99.0, 'true_positives': 90.0, 'global_step': 246}


I0801 08:05:53.364728 140393886611328 estimator.py:1145] Calling model_fn.
I0801 08:05:56.490273 140393886611328 saver.py:1499] Saver not created because there are no variables in the graph to restore
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
I0801 08:06:05.272269 140393886611328 estimator.py:1147] Done calling model_fn.
I0801 08:06:05.274793 140393886611328 basic_session_run_hooks.py:541] Create CheckpointSaverHook.
I0801 08:06:06.929642 140393886611328 monitored_session.py:240] Graph was finalized.
I0801 08:06:06.941531 140393886611328 saver.py:1280] Restoring parameters from output/model.ckpt-246
I0801 08:06:09.405717 140393886611328 session_manager.py:500] Running local_init_op.
I0801 08:06:09.649559 140393886611328 session_manager.py:502] Done running local_init_op.
I0801 08:06:18.595783 140393886611328 basic_session_run_hooks.py:606] Saving checkpoints for 246 into output/model.ckpt.
I0801 08:06:31.548854 140393886611328 basic_session_run_hooks.py:2

End of epoch 3.


I0801 08:07:54.260673 140393886611328 saver.py:1499] Saver not created because there are no variables in the graph to restore
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
I0801 08:08:03.202103 140393886611328 estimator.py:1147] Done calling model_fn.
I0801 08:08:03.223937 140393886611328 evaluation.py:255] Starting evaluation at 2019-08-01T08:08:03Z
I0801 08:08:04.905133 140393886611328 monitored_session.py:240] Graph was finalized.
I0801 08:08:04.918072 140393886611328 saver.py:1280] Restoring parameters from output/model.ckpt-369
I0801 08:08:07.409672 140393886611328 session_manager.py:500] Running local_init_op.
I0801 08:08:07.660668 140393886611328 session_manager.py:502] Done running local_init_op.
I0801 08:08:11.608978 140393886611328 evaluation.py:275] Finished evaluation at 2019-08-01-08:08:11
I0801 08:08:11.610624 140393886611328 estimator.py:2039] Saving dict for global step 369: auc = 0.8904762, eval_accuracy = 0.8894009, f1_score = 0.88888884, fa

{'auc': 0.8904762, 'eval_accuracy': 0.8894009, 'f1_score': 0.88888884, 'false_negatives': 16.0, 'false_positives': 8.0, 'loss': 0.59378725, 'precision': 0.9230769, 'recall': 0.85714287, 'true_negatives': 97.0, 'true_positives': 96.0, 'global_step': 369}
[{'auc': 0.8714286, 'eval_accuracy': 0.87096775, 'f1_score': 0.8727272, 'false_negatives': 16.0, 'false_positives': 12.0, 'loss': 0.36494896, 'precision': 0.8888889, 'recall': 0.85714287, 'true_negatives': 93.0, 'true_positives': 96.0, 'global_step': 123}, {'auc': 0.8732143, 'eval_accuracy': 0.87096775, 'f1_score': 0.8653845, 'false_negatives': 22.0, 'false_positives': 6.0, 'loss': 0.4843882, 'precision': 0.9375, 'recall': 0.8035714, 'true_negatives': 99.0, 'true_positives': 90.0, 'global_step': 246}, {'auc': 0.8904762, 'eval_accuracy': 0.8894009, 'f1_score': 0.88888884, 'false_negatives': 16.0, 'false_positives': 8.0, 'loss': 0.59378725, 'precision': 0.9230769, 'recall': 0.85714287, 'true_negatives': 97.0, 'true_positives': 96.0, 'glob