# Training Embeddings for Users and Movies

This tutorial shows how to learn **item embeddings** from ratings.

We use a simple NN architecture, a long with the conditional cost function used by the [Swivel](https://arxiv.org/pdf/1602.02215.pdf) algorithm. 

The learnt embeddings are then extracted from the model and saved as TSV file.

<img src="cooc2emb.png" width="600" height="400"/>

The following are the steps of this tutorial:


1. Define input data metadata
2. Implement data input function
3. Create feature columns
4. Create a custome estimator
5. Define the train and evaluate experiment
6. Set the experiment parameters
7. Run the experiment
8. Extract the learnt **movie embeddings** from the model
9. Export the saved model to serve as a **customer embedding** lookup


<a href="https://colab.research.google.com/github/ksalama/data2cooc2emb2ann/blob/master/02-Training_Embeddings_for_Users_and_Movies.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Setup

In [1]:
# !pip install -r ../requirements.txt

In [2]:
import os
import math
import numpy as np
import tensorflow as tf
from datetime import datetime

In [3]:
WORKSPACE = './workspace'
COOC_DIR = '{}/cooc'.format(WORKSPACE)
MODELS_DIR = '{}/models'.format(WORKSPACE)
SEED = 19831060

In [4]:
!echo "Files:"
!ls {COOC_DIR}/
!echo ""

!echo "info:"
!head {COOC_DIR}/info.log
!echo ""

!echo "user vocab file:"
!head {COOC_DIR}/vocab-0.txt
!echo ""

!echo "movie vocab file:"
!head {COOC_DIR}/vocab-1.txt

Files:
cooc-00000-of-00001.tfrecords vocab-0.txt
info.log                      vocab-1.txt

info:
max: 5
P: 1000209
min: 1

user vocab file:
1
2
3
4
5
6
7
8
9
10

movie vocab file:
1193
661
914
3408
2355
1197
1287
2804
594
919


## 1.  Metadata

In [5]:
FEATURES_SCHEMA = {
    'item1': tf.FixedLenFeature(dtype=tf.string, shape=()),
    'item2': tf.FixedLenFeature(dtype=tf.string, shape=()),
    'score': tf.FixedLenFeature(dtype=tf.float32, shape=()),
    'weight': tf.FixedLenFeature(dtype=tf.float32, shape=()),
    'type': tf.FixedLenFeature(dtype=tf.string, shape=())
}

WEIGHT_FEATURE_NAME = 'weight'
TARGET_FEATURE_NAME = 'score'

## 2.  Data Input Function

In [6]:
def make_input_fn(file_pattern, 
                  batch_size=128, num_epochs=1, mode=tf.estimator.ModeKeys.EVAL):

    def _input_fn():
        dataset = tf.data.experimental.make_batched_features_dataset(
            file_pattern,
            batch_size,
            features=FEATURES_SCHEMA,
            label_key=TARGET_FEATURE_NAME,
            reader=tf.data.TFRecordDataset,
            shuffle_buffer_size=batch_size * 2,
            num_epochs=num_epochs,
            shuffle=(mode==tf.estimator.ModeKeys.TRAIN),
            sloppy_ordering=True,
            drop_final_batch=True
        )
        return dataset
    
    return _input_fn

## 3. Feature Columns

In [7]:
def create_feature_columns(embedding_size, vocab1_file, vocab2_file):
    
    feature_columns = []
    
    # User -> item1
    feature_columns.append(
        tf.feature_column.embedding_column(
            tf.feature_column.categorical_column_with_vocabulary_file(
                key='item1', 
                vocabulary_file=vocab1_file
            ), 
            embedding_size
        )
    )
    
    # Movie -> item2
    feature_columns.append(
        tf.feature_column.embedding_column(
            tf.feature_column.categorical_column_with_vocabulary_file(
                key='item2', 
                vocabulary_file=vocab2_file
            ), 
            embedding_size
        )
    )
        
    return feature_columns

## 4.  Custom Estimator

In [8]:
def compute_loss(labels, predictions, weights, types):
    
    def _positive_sample_cost(errors, weights):
        return 0.5 * weights * tf.math.square(errors)
    
    def _negative_sample_cost(errors, weights):
        return weights * tf.math.softplus(errors)
    
    errors = predictions - labels
    
    p_loss = _positive_sample_cost(errors, weights)
    n_loss = _negative_sample_cost(errors, weights)
    loss = tf.where(tf.equal(types, 'P'), p_loss, n_loss)
    
    return tf.reduce_sum(loss)

def model_fn(features, labels, mode, params):
    
    items1 = features['item1']
    feature_columns = create_feature_columns(
        params.embedding_size, params.vocab1_file, params.vocab2_file)
    
    # User -> item1
    item1_layer = tf.feature_column.input_layer(
        features={'item1': items1}, feature_columns=[feature_columns[0]])
    
    if mode != tf.estimator.ModeKeys.PREDICT:
        # movei -> item2
        items2 = features['item2']
        item2_layer = tf.feature_column.input_layer(
            features={'item2': items2}, feature_columns=[feature_columns[1]])
        
        dot_product = tf.keras.layers.Dot(axes=1)([item1_layer, item2_layer])
        logits = (params.max_value - params.min_value) * tf.sigmoid(dot_product) + params.min_value 

    predictions = None
    export_outputs = None
    loss = None
    train_op = None

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions =  item1_layer
        export_outputs = {'predictions': tf.estimator.export.PredictOutput(predictions)}
    else:
        types = features['type']
        weights = features[WEIGHT_FEATURE_NAME]

        loss = compute_loss(
            labels=labels, 
            predictions=tf.squeeze(logits), 
            weights=weights, 
            types=types
        )
        
        train_op=tf.train.AdamOptimizer(params.learning_rate).minimize(
            loss=loss, global_step=tf.train.get_global_step())
        
    
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        export_outputs=export_outputs,
        loss=loss,
        train_op=train_op
    )


def create_estimator(params, run_config):
    
    estimator = tf.estimator.Estimator(
        model_fn,
        params=params,
        config=run_config
    )
    
    return estimator

## 5. Experiment

In [9]:
def run_experiment(params, run_config):
    
    train_data_files = params.train_data_files
    eval_data_files = params.eval_data_files
    
    # TrainSpec ####################################
    train_input_fn = make_input_fn(
        train_data_files,
        batch_size=params.batch_size,
        num_epochs=None,
        mode=tf.estimator.ModeKeys.TRAIN
    )
    
    train_spec = tf.estimator.TrainSpec(
        input_fn = train_input_fn,
        max_steps=params.traning_steps
    )
    ###############################################    
    
    # EvalSpec ####################################
    eval_input_fn = make_input_fn(
        eval_data_files,
        num_epochs=None,
        batch_size=params.batch_size,
    )

    eval_spec = tf.estimator.EvalSpec(
        name=datetime.utcnow().strftime("%H%M%S"),
        input_fn = eval_input_fn,
        steps=params.eval_steps,
        start_delay_secs=0,
        throttle_secs=params.eval_throttle_secs
    )
    ###############################################

    tf.logging.set_verbosity(tf.logging.INFO)
    
    if tf.gfile.Exists(run_config.model_dir):
        print("Removing previous artefacts...")
        tf.gfile.DeleteRecursively(run_config.model_dir)
            
    print("")
    estimator = create_estimator(params, run_config)
    print("")
    
    time_start = datetime.utcnow() 
    print("Experiment started at {}".format(time_start.strftime("%H:%M:%S")))
    print(".......................................") 

    tf.estimator.train_and_evaluate(
        estimator=estimator,
        train_spec=train_spec, 
        eval_spec=eval_spec
    )

    time_end = datetime.utcnow() 
    print(".......................................")
    print("Experiment finished at {}".format(time_end.strftime("%H:%M:%S")))
    print("")
    time_elapsed = time_end - time_start
    print("Experiment elapsed time: {} seconds".format(time_elapsed.total_seconds()))
    
    return estimator

## 6. Parameters 

In [10]:
MODEL_NAME = 'cooc2emb-01'
model_dir = os.path.join(MODELS_DIR, MODEL_NAME)
info_file = os.path.join(COOC_DIR, 'info.log')
min_value = 15
max_value = -5

info_map = {}

if os.path.exists(info_file):
    try:
        with open(info_file) as f:
            for line in f.readlines():
                key, value = line.split(":")
                info_map[key] = float(value)
        min_value = math.floor(info_map['min'])
        max_value = math.ceil(info_map['max'])
    except: pass
    
class HParams():
    pass

params  = HParams()
params.train_data_files = "{}/cooc-*.tfrecords".format(COOC_DIR)
params.eval_data_files = "{}/cooc-*.tfrecords".format(COOC_DIR)
params.vocab1_file = os.path.join(COOC_DIR,'vocab-0.txt')
params.vocab2_file = os.path.join(COOC_DIR,'vocab-1.txt')
params.embedding_size = 32
params.min_value = min_value
params.max_value = max_value
params.batch_size = 265
params.traning_steps = 30000
params.learning_rate = 0.001
params.eval_steps = 1
params.eval_throttle_secs = 0

print(vars(params))

run_config = tf.estimator.RunConfig(
    tf_random_seed=SEED,
    save_checkpoints_steps=1000,
    keep_checkpoint_max=3,
    model_dir=model_dir,
)

{'train_data_files': './workspace/cooc/cooc-*.tfrecords', 'eval_data_files': './workspace/cooc/cooc-*.tfrecords', 'vocab1_file': './workspace/cooc/vocab-0.txt', 'vocab2_file': './workspace/cooc/vocab-1.txt', 'embedding_size': 32, 'min_value': 1, 'max_value': 5, 'batch_size': 265, 'traning_steps': 30000, 'learning_rate': 0.001, 'eval_steps': 1, 'eval_throttle_secs': 0}


## 7. Run

In [11]:
estimator = run_experiment(params, run_config)


INFO:tensorflow:Using config: {'_model_dir': './workspace/models/cooc2emb-01', '_tf_random_seed': 19831060, '_save_summary_steps': 100, '_save_checkpoints_steps': 1000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 3, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x12cc2fc18>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}

Experiment started at 01:18:12
.......................................
INFO:tensorflow:Not using Distribute Coordina

INFO:tensorflow:Restoring parameters from ./workspace/models/cooc2emb-01/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Finished evaluation at 2019-09-27-02:18:18
INFO:tensorflow:Saving dict for global step 1000: global_step = 1000, loss = 228.61845
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1000: ./workspace/models/cooc2emb-01/model.ckpt-1000
INFO:tensorflow:global_step/sec: 91.0585
INFO:tensorflow:loss = 206.55667, step = 1001 (1.098 sec)
INFO:tensorflow:global_step/sec: 303.72
INFO:tensorflow:loss = 210.43985, step = 1101 (0.330 sec)
INFO:tensorflow:global_step/sec: 245.674
INFO:tensorflow:loss = 173.55173, step = 1201 (0.409 sec)
INFO:tensorflow:global_step/sec: 226.249
INFO:tensorflow:loss = 210.16516, step = 1301 (0.441 sec)
INFO:tensorflow:global_step/sec: 207.724
INFO:tensorflow:loss = 236.2754, step = 1401 (0.481 sec)
INFO:tensorflow:global_step/sec: 229.6

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-09-27T02:18:34Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./workspace/models/cooc2emb-01/model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Finished evaluation at 2019-09-27-02:18:34
INFO:tensorflow:Saving dict for global step 5000: global_step = 5000, loss = 123.15454
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 5000: ./workspace/models/cooc2emb-01/model.ckpt-5000
INFO:tensorflow:global_step/sec: 89.5826
INFO:tensorflow:loss = 156.97356, step = 5001 (1.117 sec)
INFO:tensorflow:global_step/sec: 469.25
INFO:tensorflow:loss = 155.15816, step = 5101 (0.213 sec)
INFO:tensorflow:global_step/sec: 288.656
INFO:tensorflow:loss = 132.2853, step = 5201 (0.346 sec)
INFO:tensorflow:global_step/sec: 279.986
INFO:tensorflow:loss = 137.3463, step = 5301 (0.357 sec)
IN

INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Finished evaluation at 2019-09-27-02:18:50
INFO:tensorflow:Saving dict for global step 9000: global_step = 9000, loss = 104.06326
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 9000: ./workspace/models/cooc2emb-01/model.ckpt-9000
INFO:tensorflow:global_step/sec: 48.222
INFO:tensorflow:loss = 92.981766, step = 9001 (2.072 sec)
INFO:tensorflow:global_step/sec: 375.47
INFO:tensorflow:loss = 90.7888, step = 9101 (0.266 sec)
INFO:tensorflow:global_step/sec: 390.421
INFO:tensorflow:loss = 95.52135, step = 9201 (0.256 sec)
INFO:tensorflow:global_step/sec: 428.189
INFO:tensorflow:loss = 121.99697, step = 9301 (0.234 sec)
INFO:tensorflow:global_step/sec: 451.532
INFO:tensorflow:loss = 100.0459, step = 9401 (0.222 sec)
INFO:tensorflow:global_step/sec: 431.057
INFO:tensorflow:loss = 114.793106, step = 9501 (0.232 sec)
INFO:tensorflow:global_step/sec

INFO:tensorflow:Saving dict for global step 13000: global_step = 13000, loss = 97.54475
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 13000: ./workspace/models/cooc2emb-01/model.ckpt-13000
INFO:tensorflow:global_step/sec: 55.8868
INFO:tensorflow:loss = 109.79943, step = 13001 (1.789 sec)
INFO:tensorflow:global_step/sec: 348.249
INFO:tensorflow:loss = 105.79384, step = 13101 (0.288 sec)
INFO:tensorflow:global_step/sec: 380.143
INFO:tensorflow:loss = 100.013985, step = 13201 (0.263 sec)
INFO:tensorflow:global_step/sec: 373.631
INFO:tensorflow:loss = 94.03424, step = 13301 (0.268 sec)
INFO:tensorflow:global_step/sec: 367.106
INFO:tensorflow:loss = 83.2103, step = 13401 (0.272 sec)
INFO:tensorflow:global_step/sec: 427.926
INFO:tensorflow:loss = 109.10397, step = 13501 (0.233 sec)
INFO:tensorflow:global_step/sec: 273.396
INFO:tensorflow:loss = 91.80887, step = 13601 (0.367 sec)
INFO:tensorflow:global_step/sec: 280.017
INFO:tensorflow:loss = 87.61782, step = 13701 (0.358 s

INFO:tensorflow:global_step/sec: 65.0242
INFO:tensorflow:loss = 88.75549, step = 17001 (1.538 sec)
INFO:tensorflow:global_step/sec: 351.961
INFO:tensorflow:loss = 87.70805, step = 17101 (0.284 sec)
INFO:tensorflow:global_step/sec: 410.777
INFO:tensorflow:loss = 96.26431, step = 17201 (0.244 sec)
INFO:tensorflow:global_step/sec: 428.95
INFO:tensorflow:loss = 80.13662, step = 17301 (0.233 sec)
INFO:tensorflow:global_step/sec: 397.27
INFO:tensorflow:loss = 88.219055, step = 17401 (0.252 sec)
INFO:tensorflow:global_step/sec: 368.491
INFO:tensorflow:loss = 122.49022, step = 17501 (0.272 sec)
INFO:tensorflow:global_step/sec: 352.746
INFO:tensorflow:loss = 93.09335, step = 17601 (0.283 sec)
INFO:tensorflow:global_step/sec: 368.244
INFO:tensorflow:loss = 76.97647, step = 17701 (0.272 sec)
INFO:tensorflow:global_step/sec: 264.687
INFO:tensorflow:loss = 92.97679, step = 17801 (0.378 sec)
INFO:tensorflow:global_step/sec: 376.336
INFO:tensorflow:loss = 77.86886, step = 17901 (0.265 sec)
INFO:tenso

INFO:tensorflow:global_step/sec: 410.615
INFO:tensorflow:loss = 89.90655, step = 21201 (0.243 sec)
INFO:tensorflow:global_step/sec: 388.675
INFO:tensorflow:loss = 95.30946, step = 21301 (0.257 sec)
INFO:tensorflow:global_step/sec: 401.784
INFO:tensorflow:loss = 90.563354, step = 21401 (0.249 sec)
INFO:tensorflow:global_step/sec: 367.331
INFO:tensorflow:loss = 75.42296, step = 21501 (0.273 sec)
INFO:tensorflow:global_step/sec: 345.771
INFO:tensorflow:loss = 77.22201, step = 21601 (0.289 sec)
INFO:tensorflow:global_step/sec: 350.21
INFO:tensorflow:loss = 78.46579, step = 21701 (0.286 sec)
INFO:tensorflow:global_step/sec: 337.19
INFO:tensorflow:loss = 90.17363, step = 21801 (0.297 sec)
INFO:tensorflow:global_step/sec: 339.856
INFO:tensorflow:loss = 74.302895, step = 21901 (0.294 sec)
INFO:tensorflow:Saving checkpoints for 22000 into ./workspace/models/cooc2emb-01/model.ckpt.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:vocabulary_size = 6040 in item1 is inferred from the number of el

INFO:tensorflow:global_step/sec: 228.342
INFO:tensorflow:loss = 92.39803, step = 25401 (0.438 sec)
INFO:tensorflow:global_step/sec: 436.173
INFO:tensorflow:loss = 72.07705, step = 25501 (0.229 sec)
INFO:tensorflow:global_step/sec: 432.479
INFO:tensorflow:loss = 112.93411, step = 25601 (0.231 sec)
INFO:tensorflow:global_step/sec: 426.109
INFO:tensorflow:loss = 78.771706, step = 25701 (0.235 sec)
INFO:tensorflow:global_step/sec: 258.609
INFO:tensorflow:loss = 78.13049, step = 25801 (0.393 sec)
INFO:tensorflow:global_step/sec: 298.423
INFO:tensorflow:loss = 77.73168, step = 25901 (0.329 sec)
INFO:tensorflow:Saving checkpoints for 26000 into ./workspace/models/cooc2emb-01/model.ckpt.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:vocabulary_size = 6040 in item1 is inferred from the number of elements in the vocabulary_file ./workspace/cooc/vocab-0.txt.
INFO:tensorflow:vocabulary_size = 3706 in item2 is inferred from the number of elements in the vocabulary_file ./workspace/cooc/vocab-1.

INFO:tensorflow:global_step/sec: 381.013
INFO:tensorflow:loss = 81.368416, step = 29601 (0.263 sec)
INFO:tensorflow:global_step/sec: 296.638
INFO:tensorflow:loss = 78.36748, step = 29701 (0.342 sec)
INFO:tensorflow:global_step/sec: 267.127
INFO:tensorflow:loss = 85.49918, step = 29801 (0.369 sec)
INFO:tensorflow:global_step/sec: 434.543
INFO:tensorflow:loss = 78.89255, step = 29901 (0.229 sec)
INFO:tensorflow:Saving checkpoints for 30000 into ./workspace/models/cooc2emb-01/model.ckpt.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:vocabulary_size = 6040 in item1 is inferred from the number of elements in the vocabulary_file ./workspace/cooc/vocab-0.txt.
INFO:tensorflow:vocabulary_size = 3706 in item2 is inferred from the number of elements in the vocabulary_file ./workspace/cooc/vocab-1.txt.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-09-27T02:20:17Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./workspace/mode

<img src="loss.png" width="800" height="1000"/>

## 8. Extract movie embeddings

In [12]:
def extract_embeddings():
    
    with tf.Session() as sess:
        saver = tf.train.import_meta_graph(os.path.join(model_dir, 'model.ckpt-{}.meta'.format(params.traning_steps)))
        saver.restore(sess, os.path.join(model_dir, 'model.ckpt-{}'.format(params.traning_steps)))
        graph = tf.get_default_graph()
        # Movie -> item2
        weights_tensor = graph.get_tensor_by_name('input_layer_1/item2_embedding/embedding_weights:0')
        weights = np.array(sess.run(weights_tensor))

    return weights

In [13]:
embeddings = extract_embeddings()
print(len(embeddings))
print(embeddings[0])

INFO:tensorflow:Restoring parameters from ./workspace/models/cooc2emb-01/model.ckpt-30000
3706
[ 0.48786554  0.44926533  0.99715304 -0.08875199 -0.5374395  -0.6090899
 -0.40593907  0.4734205  -0.35131714 -0.2558661   0.56073904  0.35982215
  0.97218674 -0.55779034  0.70726514  0.7474997  -0.92734194  0.26303262
 -0.3658313  -0.50333756  0.75770146 -0.14295036 -0.30883303 -0.8436093
  0.62609416  0.7814764   0.3348082   0.95938635 -0.56043726 -0.04526934
  0.8240653   0.25066465]


In [14]:
vocab_path = os.path.join(COOC_DIR,'vocab-1.txt')
output_path = os.path.join(WORKSPACE,'embeddings.tsv')

def write_embeddings_to_tsv():
    with open(output_path, 'w') as out_f:
        with open(vocab_path) as vocab_f:
            for index, item in enumerate(vocab_f):
                embedding = embeddings[index]
                print('\t'.join([item.strip()] + [str(x) for x in embedding]), file=out_f)
                
write_embeddings_to_tsv()

In [15]:
!head {output_path}

1193	0.48786554	0.44926533	0.99715304	-0.08875199	-0.5374395	-0.6090899	-0.40593907	0.4734205	-0.35131714	-0.2558661	0.56073904	0.35982215	0.97218674	-0.55779034	0.70726514	0.7474997	-0.92734194	0.26303262	-0.3658313	-0.50333756	0.75770146	-0.14295036	-0.30883303	-0.8436093	0.62609416	0.7814764	0.3348082	0.95938635	-0.56043726	-0.04526934	0.8240653	0.25066465
661	0.27199823	-0.09464008	0.22439703	0.77144176	-0.34646046	-0.4633477	0.06666238	-0.4058656	-0.2893718	0.1230335	0.018878536	-0.050879996	-0.052338976	-0.39645898	0.981355	-0.10779701	0.548894	-0.41455618	0.025378725	-0.35352382	0.08359717	0.45449924	-0.31146097	-0.41354963	0.12552387	0.39485568	0.062287323	-0.1360741	0.15756267	0.052602876	0.049429853	0.83662766
914	0.5469784	0.87133443	0.09771238	0.0372469	-0.4693808	-0.1703261	0.07025128	0.6160236	0.10940119	-0.24975684	-0.9800201	0.39789256	0.2887055	0.1074065	-0.6837663	-0.15829694	-1.4904238	0.29903486	-0.69700825	-0.74213517	0.1001014	0.6232283	-0.37190148	-0.32212955	0

## 9. Export saved model as user-embedding lookup

In [16]:
def make_serving_input_receiver_fn():
    return tf.estimator.export.build_raw_serving_input_receiver_fn(
        {'item1': tf.placeholder(shape=[None], dtype=tf.string)} # User -> item1
    )

export_dir = os.path.join(model_dir, 'export')

if tf.gfile.Exists(export_dir):
    tf.gfile.DeleteRecursively(export_dir)
        
estimator.export_savedmodel(
    export_dir_base=export_dir,
    serving_input_receiver_fn=make_serving_input_receiver_fn()
)

Instructions for updating:
This function has been renamed, use `export_saved_model` instead.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:vocabulary_size = 6040 in item1 is inferred from the number of elements in the vocabulary_file ./workspace/cooc/vocab-0.txt.
INFO:tensorflow:vocabulary_size = 3706 in item2 is inferred from the number of elements in the vocabulary_file ./workspace/cooc/vocab-1.txt.
INFO:tensorflow:Done calling model_fn.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predictions', 'serving_default']
INFO:tensorflow:Signatures INCLUDED in export for Train: None
INFO:tensorflow:Signatures INCLUDED in export for Eval: None
INFO:t

b'./workspace/models/cooc2emb-01/export/1569547218'

In [17]:
export_dir = os.path.join(model_dir, "export")
saved_model_dir = os.path.join(
    export_dir, [f for f in os.listdir(export_dir) if f.isdigit()][0])

print(saved_model_dir)

predictor_fn = tf.contrib.predictor.from_saved_model(
    export_dir = saved_model_dir,
)

output = predictor_fn({'item1': ['1']})
print(output)

./workspace/models/cooc2emb-01/export/1569547218
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.
INFO:tensorflow:Restoring parameters from ./workspace/models/cooc2emb-01/export/1569547218/variables/variables
{'output': array([[ 1.1937319e-02, -1.1453444e-01,  5.2106757e-02, -4.2205505e-02,
         1.4611191e-01, -5.5050647e-01, -2.5113228e-01,  3.0310541e-01,
         2.6608611e-04, -2.9607463e-01,  2.3776400e-01,  1.5668590e-01,
