In [1]:
%load_ext autoreload

In [3]:
%autoreload 2

In [17]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import getpass
from IPython.display import display
import json
import nltk
import numpy as np
import pandas as pd
import pkg_resources
import os
import random
import re
import seaborn as sns
import sklearn.metrics as metrics

import tensorflow as tf
from tensorflow.python.lib.io import file_io

In [159]:
from utils_export.dataset import Dataset, Model
from utils_export import utils_cloudml
from utils_export import utils_tfrecords

In [6]:
# Faster to access GCS file:
# https://github.com/tensorflow/tensorflow/issues/15530
os.environ['GCS_READ_CACHE_MAX_SIZE_MB'] = '0'

In [7]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     /usr/local/google/home/msushkov/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [117]:
def tokenizer(text, lowercase=True):
  """Converts text to a list of words.

  Args:
    text: piece of text to tokenize (string).
    lowercase: whether to include lowercasing in preprocessing (bool).

  Returns:
    A list of strings (words).
  """
  words = nltk.word_tokenize(text.decode('utf-8'))
  if lowercase:
    words = [w.lower() for w in words]
  return words

In [208]:
def make_test_input_fn(dataset_path,
                       model_text_feature,
                       dataset_text_feature,
                       data_label,
                       tokenizer_fn,
                       label_data_type=tf.float32,
                       max_n_examples=None,
                       random_filter_keep_rate=1.0):
    """Returns a test input function.
    
    Args:
      dataset_path (str): Path to dataset.
      model_text_feature (str): The feature column corresponding to the
        text input the model expects.
      dataset_text_feature (str): The name of the text feature of the dataset.
      data_label (str): The output label for the dataset.
      tokenizer_fn: Tokenizer function (str -> list).
      max_n_examples (int): How many examples to evaluate on.
      random_filter_keep_rate (float): Filter out test examples with this probability.

    Returns:
      Test input function.
    """
    decoding_input_features = {
      dataset_text_feature: tf.FixedLenFeature([], dtype=tf.string),
      data_label: tf.FixedLenFeature([], dtype=label_data_type)
    }

    def test_input_fn(max_n_examples=max_n_examples,
                      random_filter_keep_rate=random_filter_keep_rate):
        """Test input function.
        
        Args:
          max_n_examples (int): How many examples to evaluate on.
          random_filter_keep_rate (float): Filter out test examples with this probability.
          
        Returns:
          DataFrame with the results.
        """
        res = utils_tfrecords.decode_tf_records_to_pandas(
            decoding_input_features,
            dataset_path,
            max_n_examples,
            random_filter_keep_rate)
        if not tokenizer_fn:
            tok = lambda x: [x]
            res[model_text_feature] = list(map(tok, res[dataset_text_feature]))
        else:
            res[model_text_feature] = list(map(tokenizer_fn, res[dataset_text_feature]))
        res = res.rename(columns={ data_label: 'label' })
        res['label'] = list(map(lambda x: bool(round(x)), list(res['label'])))
        final = res.copy(deep=True)
        return final

    return test_input_fn

In [134]:
def print_results(results_df, model_names):
    """Print the classification results.
    
    Args:
      results_df: DataFrame with the results.
      model_names: List of strings representing the models for which we have results.
    """
    labels = results_df['label']
    for _model in model_names:
        print(_model)
        model_preds = results_df[_model]
        fpr, tpr, thresholds = metrics.roc_curve(labels, model_preds)
        roc_auc = metrics.auc(fpr, tpr)
        recalls, precisions, thr = metrics.precision_recall_curve(labels, model_preds)
        pr_auc = metrics.auc(precisions, recalls)
        print('\tROC AUC: {}'.format(roc_auc))
        print('\tPR AUC: {}'.format(pr_auc))

In [121]:
PROJECT_NAME = 'conversationai-models'
SENTENCE_KEY = 'comment_key' #Input key

# Pattern for path of tf_records
OUTPUT_DIR_BASE = os.path.join(
    'gs://conversationai-models',
    getpass.getuser(),
    'tfrecords')

## Evaluate models on Civil Comments dataset

In [175]:
LABEL_NAME_PREDICTION_MODEL = 'toxicity/logistic'
DATASET = 'gs://conversationai-models/resources/civil_comments_data/train_eval_test/test-*.tfrecord'
DATA_LABEL = 'toxicity'
DATASET_TEXT_FEATURE='comment_text'

# Pattern for path of tf_records
OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'civil_comments_test')

### CNN, GRU Attention Models

In [140]:
MODEL_TEXT_FEATURE = 'tokens'
MODEL_NAMES = [
    'tf_cnn_civil_comments_glove:v_20190219_185541',
    'tf_gru_attention_civil_comments_glove:v_20190219_185619',
]

model_input_spec = {
    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

test_input_fn = make_test_input_fn(
    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, tokenizer)

In [141]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(10000000)



INFO:tensorflow:input_fn is compatible with the `Dataset` class.
INFO:tensorflow:Loaded 100000 lines.
INFO:tensorflow:Loaded 200000 lines.
INFO:tensorflow:End of file.


In [142]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)

INFO:tensorflow:Model is compatible with the `Dataset` instance.
INFO:tensorflow:Doing batch 0/0
INFO:tensorflow:TF-Records already exist - overwriting them.
INFO:tensorflow:Preparing train data: 0/242144
INFO:tensorflow:Preparing train data: 10000/242144
INFO:tensorflow:Preparing train data: 20000/242144
INFO:tensorflow:Preparing train data: 30000/242144
INFO:tensorflow:Preparing train data: 40000/242144
INFO:tensorflow:Preparing train data: 50000/242144
INFO:tensorflow:Preparing train data: 60000/242144
INFO:tensorflow:Preparing train data: 70000/242144
INFO:tensorflow:Preparing train data: 80000/242144
INFO:tensorflow:Preparing train data: 90000/242144
INFO:tensorflow:Preparing train data: 100000/242144
INFO:tensorflow:Preparing train data: 110000/242144
INFO:tensorflow:Preparing train data: 120000/242144
INFO:tensorflow:Preparing train data: 130000/242144
INFO:tensorflow:Preparing train data: 140000/242144
INFO:tensorflow:Preparing train data: 150000/242144
INFO:tensorflow:Preparin



INFO:tensorflow:state : QUEUED




INFO:tensorflow:state : QUEUED




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 5
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 7
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 10
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 12
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 15
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 17
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 20
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 22
INFO:tensorflow:Prediction job completed.




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Prediction job completed.


In [143]:
civil_comments_test_df = test_dataset.show_data()

In [144]:
print_results(civil_comments_test_df, MODEL_NAMES)

tf_cnn_civil_comments_glove:v_20190219_185541
	ROC AUC: 0.9573435242534393
	PR AUC: 0.6729934425219886
tf_gru_attention_civil_comments_glove:v_20190219_185619
	ROC AUC: 0.9649161132104584
	PR AUC: 0.7486011745102973


### TF-Hub Model

In [189]:
MODEL_TEXT_FEATURE = 'text'
MODEL_NAMES = [
    'tf_hub_classifier_civil_comments:v20190322_142141_21201_1553344552',
]

model_input_spec = {
    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

test_input_fn = make_test_input_fn(
    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, None)

In [190]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(10000000)

INFO:tensorflow:input_fn is compatible with the `Dataset` class.
INFO:tensorflow:Loaded 100000 lines.
INFO:tensorflow:Loaded 200000 lines.
INFO:tensorflow:End of file.


In [192]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)

INFO:tensorflow:Model is compatible with the `Dataset` instance.
INFO:tensorflow:Doing batch 0/0
INFO:tensorflow:TF-Records already exist - overwriting them.
INFO:tensorflow:Preparing train data: 0/242144
INFO:tensorflow:Preparing train data: 10000/242144
INFO:tensorflow:Preparing train data: 20000/242144
INFO:tensorflow:Preparing train data: 30000/242144
INFO:tensorflow:Preparing train data: 40000/242144
INFO:tensorflow:Preparing train data: 50000/242144
INFO:tensorflow:Preparing train data: 60000/242144
INFO:tensorflow:Preparing train data: 70000/242144
INFO:tensorflow:Preparing train data: 80000/242144
INFO:tensorflow:Preparing train data: 90000/242144
INFO:tensorflow:Preparing train data: 100000/242144
INFO:tensorflow:Preparing train data: 110000/242144
INFO:tensorflow:Preparing train data: 120000/242144
INFO:tensorflow:Preparing train data: 130000/242144
INFO:tensorflow:Preparing train data: 140000/242144
INFO:tensorflow:Preparing train data: 150000/242144
INFO:tensorflow:Preparin



INFO:tensorflow:state : QUEUED




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 5
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 7
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 10
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 12
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 15
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 17
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 20
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 22
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 25
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 27
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 30
INFO:tensorflow:

INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 269
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 271
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 274
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 276
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 279
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 281
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 284
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 286
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 289
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 291
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 294
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 296
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 299

INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 538
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 540
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 543
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 545
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 548
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 550
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 553
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 555
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 558
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 560
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 563
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 565
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 568

INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 807
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 809
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 812
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 814
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 817
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 819
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 822
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 824
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 827
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 829
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 832
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 834
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 837

In [193]:
civil_comments_hub_df = test_dataset.show_data()

In [194]:
print_results(civil_comments_hub_df, MODEL_NAMES)

tf_hub_classifier_civil_comments:v20190322_142141_21201_1553344552
	ROC AUC: 0.9595451744696132
	PR AUC: 0.7429338592289392


## Evaluate models on Toxicity dataset

In [195]:
LABEL_NAME_PREDICTION_MODEL = 'frac_neg/logistic'
DATASET = 'gs://conversationai-models/resources/toxicity_data/toxicity_q42017_test.tfrecord'
DATA_LABEL = 'frac_neg'
DATASET_TEXT_FEATURE='comment_text'

# Pattern for path of tf_records
OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'toxicity_test')

### CNN, GRU Attention Models

In [162]:
MODEL_TEXT_FEATURE = 'tokens'
MODEL_NAMES = [
    'tf_cnn_toxicity_glove:v_20190219_185532',
    'tf_gru_attention_toxicity_glove:v_20190219_185516',
]

model_input_spec = {
    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

test_input_fn = make_test_input_fn(
    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, tokenizer)

In [163]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(10000000)



INFO:tensorflow:input_fn is compatible with the `Dataset` class.
INFO:tensorflow:Loaded 100000 lines.
INFO:tensorflow:End of file.


In [148]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)

INFO:tensorflow:Model is compatible with the `Dataset` instance.
INFO:tensorflow:Doing batch 0/0
INFO:tensorflow:Preparing train data: 0/187681
INFO:tensorflow:Preparing train data: 10000/187681
INFO:tensorflow:Preparing train data: 20000/187681
INFO:tensorflow:Preparing train data: 30000/187681
INFO:tensorflow:Preparing train data: 40000/187681
INFO:tensorflow:Preparing train data: 50000/187681
INFO:tensorflow:Preparing train data: 60000/187681
INFO:tensorflow:Preparing train data: 70000/187681
INFO:tensorflow:Preparing train data: 80000/187681
INFO:tensorflow:Preparing train data: 90000/187681
INFO:tensorflow:Preparing train data: 100000/187681
INFO:tensorflow:Preparing train data: 110000/187681
INFO:tensorflow:Preparing train data: 120000/187681
INFO:tensorflow:Preparing train data: 130000/187681
INFO:tensorflow:Preparing train data: 140000/187681
INFO:tensorflow:Preparing train data: 150000/187681
INFO:tensorflow:Preparing train data: 160000/187681
INFO:tensorflow:Preparing train d



INFO:tensorflow:state : QUEUED




INFO:tensorflow:state : QUEUED




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 5
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 7
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 10
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 12
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 15
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 17
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 20
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 22
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 25
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 27
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 30
INFO:tensorflow:



INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Prediction job completed.


In [149]:
toxicity_test_df1 = test_dataset.show_data()

In [150]:
print_results(toxicity_test_df1, MODEL_NAMES)

tf_cnn_toxicity_glove:v_20190219_185532
	ROC AUC: 0.951760553925346
	PR AUC: 0.8740274773143215
tf_gru_attention_toxicity_glove:v_20190219_185516
	ROC AUC: 0.9543916575133977
	PR AUC: 0.8814208812923074


### TF-Hub Model

In [196]:
MODEL_TEXT_FEATURE = 'text'
MODEL_NAMES = [
    'tf_hub_classifier_toxicity:v20190322_142740_24239_1553555427',
]

model_input_spec = {
    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

test_input_fn = make_test_input_fn(
    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, None)

In [197]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(10000000)



INFO:tensorflow:input_fn is compatible with the `Dataset` class.
INFO:tensorflow:Loaded 100000 lines.
INFO:tensorflow:End of file.


In [199]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)

INFO:tensorflow:Model is compatible with the `Dataset` instance.
INFO:tensorflow:Doing batch 0/0
INFO:tensorflow:TF-Records already exist - overwriting them.
INFO:tensorflow:Preparing train data: 0/187681
INFO:tensorflow:Preparing train data: 10000/187681
INFO:tensorflow:Preparing train data: 20000/187681
INFO:tensorflow:Preparing train data: 30000/187681
INFO:tensorflow:Preparing train data: 40000/187681
INFO:tensorflow:Preparing train data: 50000/187681
INFO:tensorflow:Preparing train data: 60000/187681
INFO:tensorflow:Preparing train data: 70000/187681
INFO:tensorflow:Preparing train data: 80000/187681
INFO:tensorflow:Preparing train data: 90000/187681
INFO:tensorflow:Preparing train data: 100000/187681
INFO:tensorflow:Preparing train data: 110000/187681
INFO:tensorflow:Preparing train data: 120000/187681
INFO:tensorflow:Preparing train data: 130000/187681
INFO:tensorflow:Preparing train data: 140000/187681
INFO:tensorflow:Preparing train data: 150000/187681
INFO:tensorflow:Preparin



INFO:tensorflow:state : QUEUED




INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 0
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 2
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 5
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 7
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 10
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 12
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 15
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 17
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 20
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 22
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 25
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 27
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 30
INFO:tensorflow:

INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 269
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 271
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 274
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 276
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 279
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 281
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 284
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 286
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 289
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 291
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 294
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 296
INFO:tensorflow:Waiting for prediction job to complete. Minutes elapsed: 299

In [200]:
toxicity_test_df2 = test_dataset.show_data()

In [201]:
print_results(toxicity_test_df2, MODEL_NAMES)

tf_hub_classifier_toxicity:v20190322_142740_24239_1553555427
	ROC AUC: 0.9270843170934745
	PR AUC: 0.8155815559085313


## Evaluate models on Many Communities data

In [242]:
LABEL_NAME_PREDICTION_MODEL = 'removed/logistic'
DATASET = 'gs://conversationai-models/resources/transfer_learning_data/many_communities/20181105_answers.tfrecord'
DATA_LABEL = 'removed'
DATASET_TEXT_FEATURE='comment_text'

# Pattern for path of tf_records
OUTPUT_DIR = os.path.join(OUTPUT_DIR_BASE, 'many_communities_test')

In [255]:
DATASET = 'gs://conversationai-models/resources/transfer_learning_data/many_communities/20181105_answers.tfrecord'
raw_dataset = tf.data.TFRecordDataset([DATASET])
iterator = raw_dataset.make_one_shot_iterator()
next_element = iterator.get_next()
with tf.Session() as sess:
    for i in range(100000):
        value = sess.run(next_element)
        if "removed" not in str(value):
            print(value)

b'\n\xe2\x01\n\x9c\x01\n\x0ccomment_text\x12\x8b\x01\n\x88\x01\n\x85\x01^^^^^^^\nIf I could give you 100 upvotes for this I would. So perfectly well stated. \n\nI tip my hat to you and your wisdom, Good Sir! \n\x19\n\ncomment_id\x12\x0b\n\t\n\x07e8rbnjy\n&\n\tsubreddit\x12\x19\n\x17\n\x15whereareallthegoodmen'
b'\nY\n"\n\x0ccomment_text\x12\x12\n\x10\n\x0eMe too as well\n\x19\n\ncomment_id\x12\x0b\n\t\n\x07e8spfo5\n\x18\n\tsubreddit\x12\x0b\n\t\n\x07thinspo'
b"\n\x92\x02\n\xda\x01\n\x0ccomment_text\x12\xc9\x01\n\xc6\x01\n\xc3\x01I'm also vegetarian and have a hard time keeping sodium in, reading your comment and the one preceding yours has made me realise I probably do go way overboard with salt. Thanks for the heads up.\n\x19\n\ncomment_id\x12\x0b\n\t\n\x07e8rep2q\n\x18\n\tsubreddit\x12\x0b\n\t\n\x07thinspo"
b'\nT\n\x18\n\tsubreddit\x12\x0b\n\t\n\x07thinspo\n\x1d\n\x0ccomment_text\x12\r\n\x0b\n\tMe please\n\x19\n\ncomment_id\x12\x0b\n\t\n\x07e8rd0zu'
b'\n\xa3\x01\nn\n\x0ccomment_text\

### CNN, GRU Attention Models

In [243]:
MODEL_TEXT_FEATURE = 'tokens'
MODEL_NAMES = [
    'tf_cnn_many_communities_glove:v_20190219_185551',
    'tf_gru_attention_many_communities:v20190322_142800_507893_1556085643',
]

model_input_spec = {
    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

test_input_fn = make_test_input_fn(
    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, tokenizer, label_data_type=tf.int64)

In [244]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(100000000)

INFO:tensorflow:input_fn is compatible with the `Dataset` class.


InvalidArgumentError: Feature: removed (data type: int64) is required but could not be found.
	 [[node ParseSingleExample_81/ParseSingleExample (defined at /usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/utils_export/utils_tfrecords.py:151)  = ParseSingleExample[Tdense=[DT_STRING, DT_INT64], dense_keys=["comment_text", "removed"], dense_shapes=[[], []], num_sparse=0, sparse_keys=[], sparse_types=[], _device="/job:localhost/replica:0/task:0/device:CPU:0"](ReaderReadV2_81:1, ParseSingleExample_81/Const, ParseSingleExample_81/Const_1)]]

Caused by op 'ParseSingleExample_81/ParseSingleExample', defined at:
  File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/usr/lib/python3.6/asyncio/base_events.py", line 1432, in _run_once
    handle._run()
  File "/usr/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/tornado/gen.py", line 781, in inner
    self.run()
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/tornado/gen.py", line 742, in run
    yielded = self.gen.send(value)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2848, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2874, in _run_cell
    return runner(coro)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3049, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3220, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3296, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-244-c41f43d061dc>", line 5, in <module>
    test_dataset.load_data(100000000)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/utils_export/dataset.py", line 169, in load_data
    self.data = self._input_fn(max_n_examples=max_n_examples, **kwargs)
  File "<ipython-input-208-1b46ec286e6e>", line 44, in test_input_fn
    random_filter_keep_rate)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/utils_export/utils_tfrecords.py", line 151, in decode_tf_records_to_pandas
    serialized=serialized_example, features=decoding_features_spec)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/tensorflow/python/ops/parsing_ops.py", line 801, in parse_single_example
    return parse_single_example_v2(serialized, features, name)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/tensorflow/python/ops/parsing_ops.py", line 1692, in parse_single_example_v2
    dense_defaults, dense_shapes, name)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/tensorflow/python/ops/parsing_ops.py", line 1809, in _parse_single_example_v2_raw
    name=name)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/tensorflow/python/ops/gen_parsing_ops.py", line 1009, in parse_single_example
    sparse_types=sparse_types, dense_shapes=dense_shapes, name=name)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
  File "/usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/.pyenv/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): Feature: removed (data type: int64) is required but could not be found.
	 [[node ParseSingleExample_81/ParseSingleExample (defined at /usr/local/google/home/msushkov/code/conversationai-models/model_evaluation/utils_export/utils_tfrecords.py:151)  = ParseSingleExample[Tdense=[DT_STRING, DT_INT64], dense_keys=["comment_text", "removed"], dense_shapes=[[], []], num_sparse=0, sparse_keys=[], sparse_types=[], _device="/job:localhost/replica:0/task:0/device:CPU:0"](ReaderReadV2_81:1, ParseSingleExample_81/Const, ParseSingleExample_81/Const_1)]]


In [None]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)

In [None]:
many_communities_test_df = test_dataset.show_data()

In [None]:
print_results(many_communities_test_df, MODEL_NAMES)

### TF-Hub Model

In [None]:
MODEL_TEXT_FEATURE = 'text'
MODEL_NAMES = [
    'tf_hub_classifier_many_communities:v20190219_185602_316000_1553563221',
]

model_input_spec = {
    MODEL_TEXT_FEATURE: utils_tfrecords.EncodingFeatureSpec.LIST_STRING
}

model = Model(
    feature_keys_spec=model_input_spec,
    prediction_keys=LABEL_NAME_PREDICTION_MODEL,
    example_key=SENTENCE_KEY,
    model_names=MODEL_NAMES,
    project_name=PROJECT_NAME)

test_input_fn = make_test_input_fn(
    DATASET, MODEL_TEXT_FEATURE, DATASET_TEXT_FEATURE,
    DATA_LABEL, None, label_data_type=tf.int64)

In [None]:
# Need to set seed before loading data to be able to reload same data in the future
random.seed(2018)

test_dataset = Dataset(test_input_fn, OUTPUT_DIR)
test_dataset.load_data(10000000)

In [None]:
# Set recompute_predictions=False to save time if predictions are available.
test_dataset.add_model_prediction_to_data(model, recompute_predictions=True)

In [None]:
many_communities_tfhub_test_df = test_dataset.show_data()

In [None]:
print_results(many_communities_tfhub_test_df, MODEL_NAMES)