In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import functools
import os
import re
import shutil
import numpy as np 
import pandas as pd 

import tensorflow as tf
import tensorflow_hub as hub

import adanet
from adanet.examples import simple_dnn

# The random seed to use.
RANDOM_SEED = 42

#tensorboard --logdir /media/eigenstir/1TBSecondary/tbgraphs --host localhost --port 7888 &

LOG_DIR = '/media/eigenstir/1TBSecondary/tbgraphs'

# Data Loading
Load and label data

In [2]:
def load_directory_data(directory):
    data = {}
    data["sentence"] = []
    data["sentiment"] = []
    for file_path in os.listdir(directory):
        with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
            data["sentence"].append(f.read())
            data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
    return pd.DataFrame.from_dict(data)

def load_dataset(directory):
    pos_df = load_directory_data(os.path.join(directory, "pos"))
    neg_df = load_directory_data(os.path.join(directory, "neg"))
    pos_df["polarity"] = 1
    neg_df["polarity"] = 0
    return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

def download_and_load_datasets(force_download=False):
    dataset = tf.keras.utils.get_file(
    fname="aclImdb.tar.gz",
    origin="http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
    extract=True
    )
    train_df = load_dataset(os.path.join(os.path.dirname(dataset),
                                      "aclImdb", "train"))
    test_df = load_dataset(os.path.join(os.path.dirname(dataset),
                                      "aclImdb", "test"))
    return train_df, test_df

tf.logging.set_verbosity(tf.logging.INFO)

train_df, test_df = download_and_load_datasets()
train_df.head()

Unnamed: 0,sentence,sentiment,polarity
0,I mistakenly kept myself awake late last night...,1,0
1,Saw this movie recently and had higher hopes. ...,2,0
2,This is a film that had a lot to live down to ...,3,0
3,"""Rock 'n' Roll High School"" will probably have...",10,1
4,Even if you know absolutely nothing about Irel...,10,1


# Supply the data in TF 
Use input functions that wrap the DataFrame in input functions

In [3]:
FEATURES_KEY = "sentence"

train_input_fn = tf.estimator.inputs.pandas_input_fn(
  train_df, train_df["polarity"], num_epochs=None, shuffle=True)

predict_train_input_fn = tf.estimator.inputs.pandas_input_fn(
  train_df, train_df["polarity"], shuffle=False)

predict_test_input_fn = tf.estimator.inputs.pandas_input_fn(
  test_df, test_df["polarity"], shuffle=False)

# Establish baselines
Test how a simple model performs on the dataset

In [4]:
NUM_CLASSES = 2

loss_reduction = tf.losses.Reduction.SUM_OVER_BATCH_SIZE

head = tf.contrib.estimator.binary_classification_head(
  loss_reduction=loss_reduction)

hub_columns=hub.text_embedding_column(
    key=FEATURES_KEY, 
    module_spec="https://tfhub.dev/google/nnlm-en-dim128/1")

def make_config(experiment_name):
  # Estimator configuration.
  return tf.estimator.RunConfig(
    save_checkpoints_steps=1000,
    save_summary_steps=1000,
    tf_random_seed=RANDOM_SEED,
    model_dir=os.path.join(LOG_DIR, experiment_name))

# Train a baseline model

In [5]:
#@test {"skip": true}
#@title Parameters
LEARNING_RATE = 0.001 #@param {type:"number"}
TRAIN_STEPS = 5000 #@param {type:"integer"}

estimator = tf.estimator.LinearClassifier(
  feature_columns=[hub_columns],
  n_classes=NUM_CLASSES,
  optimizer=tf.train.RMSPropOptimizer(learning_rate=LEARNING_RATE),
  loss_reduction=loss_reduction,
  config=make_config("linear"))

results, _ = tf.estimator.train_and_evaluate(estimator, 
                                             train_spec=tf.estimator.TrainSpec(
                                                        input_fn=train_input_fn,
                                                        max_steps=TRAIN_STEPS),
                                              eval_spec=tf.estimator.EvalSpec(
                                                        input_fn=predict_test_input_fn,
                                                        steps=None))

print("Accuracy: ", results["accuracy"])
print("Loss: ", results["average_loss"])

INFO:tensorflow:Using config: {'_model_dir': '/media/eigenstir/1TBSecondary/tbgraphs/linear', '_tf_random_seed': 42, '_save_summary_steps': 1000, '_save_checkpoints_steps': 1000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f3825974860>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


I0408 21:46:28.434419 139880324773696 estimator.py:201] Using config: {'_model_dir': '/media/eigenstir/1TBSecondary/tbgraphs/linear', '_tf_random_seed': 42, '_save_summary_steps': 1000, '_save_checkpoints_steps': 1000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f3825974860>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


INFO:tensorflow:Not using Distribute Coordinator.


I0408 21:46:28.440585 139880324773696 estimator_training.py:185] Not using Distribute Coordinator.


INFO:tensorflow:Running training and evaluation locally (non-distributed).


I0408 21:46:28.444280 139880324773696 training.py:610] Running training and evaluation locally (non-distributed).


INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps 1000 or save_checkpoints_secs None.


I0408 21:46:28.447603 139880324773696 training.py:698] Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps 1000 or save_checkpoints_secs None.


INFO:tensorflow:Skipping training since max_steps has already saved.


I0408 21:46:28.470821 139880324773696 estimator.py:351] Skipping training since max_steps has already saved.


TypeError: 'NoneType' object is not subscriptable

# Simple DNN AdaNet

In [None]:
LEARNING_RATE = 0.003
TRAIN_STEPS = 5000
ADANET_ITERATIONS = 2

estimator = adanet.Estimator(
    head=head, #using the linear classifier from earlier
    
    #define a generator which defines a space of subnetworks to train as candidates. 
    subnetwork_generator = simple_dnn.Generator(
    feature_columns=[hub_columns],
    optimizer=tf.train.RMSPropOptimizer(learning_rate=LEARNING_RATE),
    seed=RANDOM_SEED),
    
    #number of train steps per iteration
    max_iteration_steps=TRAIN_STEPS//ADANET_ITERATIONS,
    
    #evaluator to compute the overall AdaNet loss (train loss + complexity regularisation) to
    #to select the best candidate for the final model
    evaluator=adanet.Evaluator(
    input_fn=predict_train_input_fn,
    steps=1000),
    
    #config for estimators
    config=make_config("simple_dnn"))

In [None]:
results, _ = tf.estimator.train_and_evaluate(
                estimator,
                train_spec=tf.estimator.TrainSpec(
                    input_fn=train_input_fn,
                    max_steps=TRAIN_STEPS),
                eval_spec=tf.estimator.EvalSpec(
                    input_fn=predict_test_input_fn,
                    steps=None))

In [None]:
print("Accuracy:", results["accuracy"])
print("Loss:", results["average_loss"])

We see a slightly improvement - ~78% to 80%. This is because our simple_dnn.Generator will search over fully connected NNs that have more power than a simple linear model.

The above only generates subnetworks that take embedding results from one module. We can add diversity to the search space by building subnetworks that take different embeddings which might improve performance. To do that, we need to define a custom:
1. adanet.subnetwork.Build
1. adanet.subnetwork.Generator

# Define an AdaNet model with TF Hub text embedding modules

In [None]:
class SimpleNetworkBuilder(adanet.subnetwork.Builder):
  """Builds a simple subnetwork with text embedding module."""

  def __init__(self, learning_rate, max_iteration_steps, seed,
               module_name, module):
    """Initializes a `SimpleNetworkBuilder`.

    Args:
      learning_rate: The float learning rate to use.
      max_iteration_steps: The number of steps per iteration.
      seed: The random seed.

    Returns:
      An instance of `SimpleNetworkBuilder`.
    """
    self._learning_rate = learning_rate
    self._max_iteration_steps = max_iteration_steps
    self._seed = seed
    self._module_name = module_name
    self._module = module

  def build_subnetwork(self,
                       features,
                       logits_dimension,
                       training,
                       iteration_step,
                       summary,
                       previous_ensemble=None):
    """See `adanet.subnetwork.Builder`."""
    sentence = features["sentence"]
    # Load module and apply text embedding, setting trainable=True.
    m = hub.Module(self._module, trainable=True)
    x = m(sentence)
    kernel_initializer = tf.keras.initializers.he_normal(seed=self._seed)

    # The `Head` passed to adanet.Estimator will apply the softmax activation.
    logits = tf.layers.dense(
        x, units=1, activation=None, kernel_initializer=kernel_initializer)

    # Use a constant complexity measure, since all subnetworks have the same
    # architecture and hyperparameters.
    complexity = tf.constant(1)

    return adanet.Subnetwork(
        last_layer=x,
        logits=logits,
        complexity=complexity,
        persisted_tensors={})

  def build_subnetwork_train_op(self, 
                                subnetwork, 
                                loss, 
                                var_list, 
                                labels, 
                                iteration_step,
                                summary, 
                                previous_ensemble=None):
    """See `adanet.subnetwork.Builder`."""

    learning_rate = tf.train.cosine_decay(
        learning_rate=self._learning_rate,
        global_step=iteration_step,
        decay_steps=self._max_iteration_steps)
    optimizer = tf.train.MomentumOptimizer(learning_rate, .9)
    # NOTE: The `adanet.Estimator` increments the global step.
    return optimizer.minimize(loss=loss, var_list=var_list)

  def build_mixture_weights_train_op(self, loss, var_list, logits, labels,
                                     iteration_step, summary):
    """See `adanet.subnetwork.Builder`."""
    return tf.no_op("mixture_weights_train_op")

  @property
  def name(self):
    """See `adanet.subnetwork.Builder`."""
    return self._module_name

In [None]:
def build_subnetwork_train_op(self,
                             subnetwork, 
                             loss,
                             var_list,
                             labels,
                             iteration_step, 
                             summary,
                             previous_ensemble=None):
    learning_rate = tf.train.cosine_decay(
        learning_rate=self._learning_rate,
        global_step=iteration_step,
        decay_steps=self._max_iteration_steps)
    optimizer = tf.train.MomentumOptimizer(learning_rate, .9)
    return optimizer.minimize(loss=loss, var_list=var_list)

def build_mixture_weights_train_op(self, loss, var_list, logits, labels,
                                  iteration_step, summary):
    return tf.no_op("mixture_weights_train_op")

@property
def name(self):
    return self._module_name

adanet.subnetwork.Generator defines a search space of candidate SimpleNetworkBuilder to consider including the final network. It can craete one or more at each iteration with different parameters, and the AdaNet algorithm will select the candidate that best improves the overall networks adanet_loss on the training set. 

The below will loop through the text embedding modules listed in MODULES and give it a different random seed at each iteration.

In [None]:
MODULES = [
    "https://tfhub.dev/google/nnlm-en-dim50/1",
    "https://tfhub.dev/google/nnlm-en-dim128/1",
    "https://tfhub.dev/google/universal-sentence-encoder/1"
]

In [None]:
class SimpleNetworkGenerator(adanet.subnetwork.Generator):
    def __init__(self, learning_rate, max_iteration_steps, seed=None):
        #initializes a generator that builds Simple Network
        self._seed = seed
        self._dnn_builder_fn = functools.partial(
            SimpleNetworkBuilder,
            learning_rate=learning_rate,
            max_iteration_steps=max_iteration_steps)
    
    def generate_candidates(self, previous_ensemble, 
                            iteration_number, previous_ensemble_reports, 
                            all_reports):
        module_index = iteration_number % len(MODULES)
        module_name = MODULES[module_index].split("/")[-2]
        
        print("generating candidate: %s " %module_name)
        
        seed = self._seed
        #change the seed according to the iteration 
        if seed is not None:
            seed += iteration_number
            return [self._dnn_builder_fn(seed=seed,
                                        module_name=module_name,
                                        module=MODULES[module_index])]

Now we can pass these to the AdaNet estimator

In [None]:
LEARNING_RATE = 0.05
TRAIN_STEPS = 7500
ADANET_ITERATIONS = 3

max_iteration_steps = TRAIN_STEPS // ADANET_ITERATIONS
estimator = adanet.Estimator(head=head,
                            subnetwork_generator=SimpleNetworkGenerator(
                                learning_rate=LEARNING_RATE,
                                max_iteration_steps=max_iteration_steps,
                                seed=RANDOM_SEED),
                            max_iteration_steps=max_iteration_steps,
                            evaluator=adanet.Evaluator(input_fn=train_input_fn, 
                                                       steps=10),
                            report_materializer=None,
                            adanet_loss_decay=0.99,
                            config=make_config("tfhub"))

results, _ = tf.estimator.train_and_evaluate(estimator,
                        train_spec=tf.estimator.TrainSpec(input_fn=train_input_fn,
                                                            max_steps=TRAIN_STEPS),
                        eval_spec=tf.estimator.EvalSpec(input_fn=predict_test_input_fn,
                                                       steps=None))

print("Accuracy:", results["accuracy"])
print("Loss:", results["average_loss"])

def ensemble_architecture(result):
    architecture=result["architecture/adanet/ensembles"]
    summary_proto = tf.summary.Summary.FromString(architecture)

In [None]:
predict_input_fn = tf.estimator.inputs.pandas_input_fn(
  test_df.iloc[:10], test_df["polarity"].iloc[:10], shuffle=False)

predictions = estimator.predict(input_fn=predict_input_fn)

for i, val in enumerate(predictions):
    predicted_class = val['class_ids'][0]
    prediction_confidence = val['probabilities'][predicted_class] * 100
    
    print('Actual text: ' + test_df["sentence"][i])
    print('Predicted class: %s, confidence: %s%%' 
          % (predicted_class, round(prediction_confidence, 3)))

# Saving Model

In [None]:
def serving_input_receiver_fn():
    """Serving input_fn that builds features from placeholders

    Returns
    -------
    tf.estimator.export.ServingInputReceiver
    """
    number = tf.placeholder(dtype=tf.float32, shape=[None, 1], name='number')
    receiver_tensors = {'number': number}
    features = tf.tile(number, multiples=[1, 2])
    return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

estimator.export_saved_model('saved_model', serving_input_receiver_fn)

# Reload & predict

In [None]:
export_dir = 'saved_model'
subdirs = [x for x in Path(export_dir).iterdir()
           if x.is_dir() and 'temp' not in str(x)]
latest = str(sorted(subdirs)[-1])

In [None]:
from tensorflow.contrib import predictor

predict_fn = predictor.from_saved_model(latest)
for nb in my_service():
    pred = predict_fn({'number': [[nb]]})['output']