In [199]:
import tensorflow as tf
import numpy as np
import random

In [200]:
tf.logging.set_verbosity(tf.logging.INFO)

In [285]:
data_size = 50000
max_transaction_history = 20
max_product_click_history = 20
max_promotion_click_history = 20
input_embedding_size = 64
category_size = 50
numeric_size = 1

data1 = np.random.randint(category_size, size=(data_size, random.randint(1,max_transaction_history)))
data1 = tf.keras.preprocessing.sequence.pad_sequences(data1, max_transaction_history, padding='post')
#data1 = np.array([[1,2], [1,2,4]])

data2 = np.random.randint(category_size, size=(data_size, max_product_click_history-3))
data2 = tf.keras.preprocessing.sequence.pad_sequences(data2, max_product_click_history, padding='post')

data3 = np.random.randint(category_size, size=(data_size, max_promotion_click_history-4))
data3 = tf.keras.preprocessing.sequence.pad_sequences(data3, max_promotion_click_history, padding='post')

inputs = [data1, data2, data3]

single_category_cols = {105:(3,8),106:(5,8),107:(10,8)}   ## such as location : unique_value_size
for k in single_category_cols:
    inputs.append(np.random.randint(single_category_cols[k][0], size=(data_size, 1)))

num1 = np.random.random(size=(data_size, numeric_size))
inputs.append(num1)

labels = np.random.randint(0, category_size-1, size=(data_size, 1))
one_hot_labels = tf.keras.utils.to_categorical(labels, num_classes=category_size)

In [286]:
output_filename = "../data/tf.tfrecord"

def float_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))

def int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=list(value)))

with tf.python_io.TFRecordWriter(output_filename) as writer:
    for (v1,v2,v3,v4,v5,v6,v7,v8) in zip(data1, data2, data3, inputs[3], inputs[4], inputs[5], inputs[6], labels):
        features = {'seq_categorical_0': int64_feature(v1), 'seq_categorical_1': int64_feature(v2),
                    'seq_categorical_2': int64_feature(v3),'105': int64_feature(v4),
                    '106': int64_feature(v5),'107': int64_feature(v6), 'numeric': float_feature(v7),
                    'labels': int64_feature(v8)}
        tf_example = tf.train.Example(features=tf.train.Features(feature=features))
        writer.write(tf_example.SerializeToString())


In [287]:
def parse_function(example_proto):
    features = {"seq_categorical_0":tf.FixedLenFeature([max_transaction_history],tf.int64),
                "seq_categorical_1":tf.FixedLenFeature([max_product_click_history], tf.int64),
                "seq_categorical_2":tf.FixedLenFeature([max_promotion_click_history], tf.int64),
                "105":tf.FixedLenFeature([1], tf.int64),
                "106":tf.FixedLenFeature([1], tf.int64),
                "107":tf.FixedLenFeature([1], tf.int64),
                "numeric":tf.FixedLenFeature([1], tf.float32),
                "labels":tf.FixedLenFeature([1], tf.int64),
         }
    parsed_features = tf.parse_single_example(example_proto, features)
    #return parsed_features
    #return (parsed_features["seq_categorical_0"], parsed_features["seq_categorical_1"], parsed_features["seq_categorical_2"], parsed_features["105"], parsed_features["106"], parsed_features["107"], parsed_features["numeric"]), parsed_features["labels"]
    return {'seq_categorical_0': parsed_features["seq_categorical_0"], 'seq_categorical_1':parsed_features["seq_categorical_1"], 'numeric':parsed_features["numeric"]}, parsed_features["labels"]



In [288]:
def load_data():
    ds=tf.data.TFRecordDataset('../data/tf.tfrecord')
    ds=ds.map(parse_function, num_parallel_calls=4)
    ds=ds.repeat() 
    ds=ds.batch(64)
    return ds

In [289]:
tran_seq = tf.feature_column.categorical_column_with_identity('seq_categorical_0', category_size)
promo_seq = tf.feature_column.categorical_column_with_identity('seq_categorical_1', category_size)

In [290]:
tans_emb = tf.feature_column.embedding_column(categorical_column=tran_seq, dimension=input_embedding_size, combiner='sqrtn')
promo_emb = tf.feature_column.embedding_column(categorical_column=promo_seq, dimension=input_embedding_size, combiner='sqrtn')

In [291]:
estimator = tf.estimator.DNNClassifier(
    feature_columns=[tans_emb, promo_emb],
    n_classes=category_size,
    hidden_units=[2048, 1024, 512, 256, 128],
    #optimizer=tf.train.AdamOptimizer(0.0001),
    optimizer=lambda: tf.train.AdamOptimizer(
        learning_rate=tf.train.exponential_decay(
            learning_rate=0.001,
            global_step=tf.train.get_global_step(),
            decay_steps=5000,
            decay_rate=0.96)))

I0812 12:43:30.344661 4798883264 estimator.py:1790] Using default config.
W0812 12:43:30.348038 4798883264 estimator.py:1811] Using temporary folder as model directory: /var/folders/0m/rqkyx9816pq795n34h08zf7r0000gn/T/tmpczv5jlzc
I0812 12:43:30.351577 4798883264 estimator.py:209] Using config: {'_model_dir': '/var/folders/0m/rqkyx9816pq795n34h08zf7r0000gn/T/tmpczv5jlzc', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1655ba6d8>, '_tas

In [304]:
estimator.train(input_fn=load_data, steps=10000)

I0812 13:26:28.901501 4798883264 estimator.py:1145] Calling model_fn.
I0812 13:26:30.446964 4798883264 estimator.py:1147] Done calling model_fn.
I0812 13:26:30.448822 4798883264 basic_session_run_hooks.py:541] Create CheckpointSaverHook.
I0812 13:26:30.979308 4798883264 monitored_session.py:240] Graph was finalized.
I0812 13:26:30.985590 4798883264 saver.py:1280] Restoring parameters from /var/folders/0m/rqkyx9816pq795n34h08zf7r0000gn/T/tmpczv5jlzc/model.ckpt-60000
I0812 13:26:31.222370 4798883264 session_manager.py:500] Running local_init_op.
I0812 13:26:31.270895 4798883264 session_manager.py:502] Done running local_init_op.
I0812 13:26:32.639109 4798883264 basic_session_run_hooks.py:606] Saving checkpoints for 60000 into /var/folders/0m/rqkyx9816pq795n34h08zf7r0000gn/T/tmpczv5jlzc/model.ckpt.
I0812 13:26:33.515489 4798883264 basic_session_run_hooks.py:262] loss = 62.501167, step = 60001
I0812 13:26:36.284466 4798883264 basic_session_run_hooks.py:692] global_step/sec: 36.108
I0812 13

I0812 13:28:11.896317 4798883264 basic_session_run_hooks.py:692] global_step/sec: 36.8373
I0812 13:28:11.898344 4798883264 basic_session_run_hooks.py:260] loss = 16.739208, step = 63801 (2.715 sec)
I0812 13:28:14.463960 4798883264 basic_session_run_hooks.py:692] global_step/sec: 38.9457
I0812 13:28:14.465499 4798883264 basic_session_run_hooks.py:260] loss = 9.515968, step = 63901 (2.567 sec)
I0812 13:28:16.851572 4798883264 basic_session_run_hooks.py:692] global_step/sec: 41.8827
I0812 13:28:16.853372 4798883264 basic_session_run_hooks.py:260] loss = 8.419453, step = 64001 (2.388 sec)
I0812 13:28:19.325573 4798883264 basic_session_run_hooks.py:692] global_step/sec: 40.4198
I0812 13:28:19.327362 4798883264 basic_session_run_hooks.py:260] loss = 6.8614426, step = 64101 (2.474 sec)
I0812 13:28:21.724669 4798883264 basic_session_run_hooks.py:692] global_step/sec: 41.6833
I0812 13:28:21.726477 4798883264 basic_session_run_hooks.py:260] loss = 7.6913314, step = 64201 (2.399 sec)
I0812 13:28:

I0812 13:29:59.007759 4798883264 basic_session_run_hooks.py:692] global_step/sec: 39.3132
I0812 13:29:59.010280 4798883264 basic_session_run_hooks.py:260] loss = 11.527557, step = 68001 (2.545 sec)
I0812 13:30:01.545178 4798883264 basic_session_run_hooks.py:692] global_step/sec: 39.4089
I0812 13:30:01.547007 4798883264 basic_session_run_hooks.py:260] loss = 4.7333727, step = 68101 (2.537 sec)
I0812 13:30:04.120306 4798883264 basic_session_run_hooks.py:692] global_step/sec: 38.8331
I0812 13:30:04.122070 4798883264 basic_session_run_hooks.py:260] loss = 4.5427365, step = 68201 (2.575 sec)
I0812 13:30:06.615477 4798883264 basic_session_run_hooks.py:692] global_step/sec: 40.0766
I0812 13:30:06.617798 4798883264 basic_session_run_hooks.py:260] loss = 3.7183228, step = 68301 (2.496 sec)
I0812 13:30:09.147878 4798883264 basic_session_run_hooks.py:692] global_step/sec: 39.4884
I0812 13:30:09.149681 4798883264 basic_session_run_hooks.py:260] loss = 4.88975, step = 68401 (2.532 sec)
I0812 13:30:

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifier at 0x1655bac18>

In [305]:
estimator.evaluate(input_fn=load_data, steps=5000//64)

I0812 13:32:33.455986 4798883264 estimator.py:1145] Calling model_fn.
I0812 13:32:34.461858 4798883264 estimator.py:1147] Done calling model_fn.
I0812 13:32:34.508867 4798883264 evaluation.py:255] Starting evaluation at 2019-08-12T13:32:34Z
I0812 13:32:34.770307 4798883264 monitored_session.py:240] Graph was finalized.
I0812 13:32:34.775408 4798883264 saver.py:1280] Restoring parameters from /var/folders/0m/rqkyx9816pq795n34h08zf7r0000gn/T/tmpczv5jlzc/model.ckpt-70000
I0812 13:32:34.891050 4798883264 session_manager.py:500] Running local_init_op.
I0812 13:32:34.932044 4798883264 session_manager.py:502] Done running local_init_op.
I0812 13:32:35.214597 4798883264 evaluation.py:167] Evaluation [7/78]
I0812 13:32:35.267539 4798883264 evaluation.py:167] Evaluation [14/78]
I0812 13:32:35.330278 4798883264 evaluation.py:167] Evaluation [21/78]
I0812 13:32:35.390110 4798883264 evaluation.py:167] Evaluation [28/78]
I0812 13:32:35.451578 4798883264 evaluation.py:167] Evaluation [35/78]
I0812 13

{'accuracy': 0.5983574,
 'average_loss': 2.7168224,
 'loss': 173.87663,
 'global_step': 70000}

In [130]:
def custom_metric(labels, predictions):
    # This function will be called by the Estimator, passing its predictions.
    # Let's suppose you want to add the "mean" metric...

    # Accessing the class predictions (careful, the key name may change from one canned Estimator to another)
    predicted_classes = predictions["class_ids"]  

    # Defining the metric (value and update tensors):
    custom_metric = tf.metrics.mean(labels, predicted_classes, name="custom_metric")

    # Returning as a dict:
    return {"custom_metric": custom_metric}

In [151]:
class RecoEstimator():
    
    def __init__(self, max_transaction_history = 20, max_product_click_history = 20, max_promotion_click_history = 20,
                 category_size = 100, single_categorical_features = None, numeric_features_size = 10,
                 hidden_layer1_size = 1024, hidden_layer2_size = 512, hidden_layer3_size = 256, activation='relu',
                input_embedding_size = 128):
        
        self.max_transaction_history = max_transaction_history
        self.max_product_click_history = max_product_click_history
        self.max_promotion_click_history = max_promotion_click_history
        self.category_size = category_size
        self.hidden_layer1_size = hidden_layer1_size
        self.hidden_layer2_size = hidden_layer2_size
        self.hidden_layer3_size = hidden_layer3_size
        self.single_categorical_features = single_categorical_features
        self.numeric_features_size = numeric_features_size
        self.activation = activation
        self.input_embedding_size = input_embedding_size
        self.model = None
        self.build()
        
    
    def build(self):
        seqs = self.create_input()
        
        self.model = tf.estimator.DNNClassifier(feature_columns=seqs,n_classes=self.category_size,
                                               hidden_units=[self.hidden_layer1_size, self.hidden_layer2_size, self.hidden_layer3_size],
                                               optimizer=tf.train.AdamOptimizer(0.0005))
        
        
    def create_input(self):
        
        transaction_cols = [x for x in range(self.max_transaction_history)]
        promotion_click_cols = [x for x in range(self.max_promotion_click_history)]
        seqs=[]
        seqs.append(self.seq_categorical_input('seq_categorical_0', len(transaction_cols)))
        seqs.append(self.seq_categorical_input('seq_categorical_1', len(promotion_click_cols)))
        
        seqs.append(tf.feature_column.numeric_column('numeric'))
            
        return seqs
        
    def seq_categorical_input(self, name, max_history):
        seq_input = tf.feature_column.categorical_column_with_identity(name, self.category_size)
        seq_emb = tf.feature_column.embedding_column(categorical_column=seq_input, dimension=self.input_embedding_size)
        return seq_emb

In [152]:
model = RecoEstimator(max_transaction_history, max_product_click_history, max_promotion_click_history, category_size,
                numeric_features_size = numeric_size, input_embedding_size = input_embedding_size,
                single_categorical_features = single_category_cols).model

I0810 21:25:05.375322 4798883264 estimator.py:1790] Using default config.
W0810 21:25:05.376998 4798883264 estimator.py:1811] Using temporary folder as model directory: /var/folders/0m/rqkyx9816pq795n34h08zf7r0000gn/T/tmppy1dlhmj
I0810 21:25:05.377900 4798883264 estimator.py:209] Using config: {'_model_dir': '/var/folders/0m/rqkyx9816pq795n34h08zf7r0000gn/T/tmppy1dlhmj', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x14d234c18>, '_tas

In [153]:
model = tf.estimator.add_metrics(model, custom_metric)

I0810 21:25:06.280400 4798883264 estimator.py:209] Using config: {'_model_dir': '/var/folders/0m/rqkyx9816pq795n34h08zf7r0000gn/T/tmppy1dlhmj', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x14d234cf8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [None]:
model = tf.estimator.add_metrics(model, my_auc)

In [154]:
model.train(input_fn=load_data, steps=1500)

I0810 21:25:06.889348 4798883264 estimator.py:1145] Calling model_fn.
I0810 21:25:06.890159 4798883264 estimator.py:1145] Calling model_fn.
I0810 21:25:07.860321 4798883264 estimator.py:1147] Done calling model_fn.
I0810 21:25:07.861361 4798883264 estimator.py:1147] Done calling model_fn.
I0810 21:25:07.862575 4798883264 basic_session_run_hooks.py:541] Create CheckpointSaverHook.
I0810 21:25:08.108912 4798883264 monitored_session.py:240] Graph was finalized.
I0810 21:25:08.199267 4798883264 session_manager.py:500] Running local_init_op.
I0810 21:25:08.215583 4798883264 session_manager.py:502] Done running local_init_op.
I0810 21:25:08.584144 4798883264 basic_session_run_hooks.py:606] Saving checkpoints for 0 into /var/folders/0m/rqkyx9816pq795n34h08zf7r0000gn/T/tmppy1dlhmj/model.ckpt.
I0810 21:25:08.857649 4798883264 basic_session_run_hooks.py:262] loss = 73.93373, step = 1
I0810 21:25:09.354086 4798883264 basic_session_run_hooks.py:692] global_step/sec: 201.317
I0810 21:25:09.354933 4

<tensorflow_estimator.python.estimator.estimator.Estimator at 0x14d234908>

In [156]:
model.evaluate(input_fn=load_data, steps=500//32)

I0810 21:25:19.454617 4798883264 estimator.py:1145] Calling model_fn.
I0810 21:25:19.455368 4798883264 estimator.py:1145] Calling model_fn.
I0810 21:25:19.748594 4798883264 estimator.py:1147] Done calling model_fn.
I0810 21:25:19.791748 4798883264 estimator.py:1147] Done calling model_fn.
I0810 21:25:19.808421 4798883264 evaluation.py:255] Starting evaluation at 2019-08-10T21:25:19Z
I0810 21:25:19.902137 4798883264 monitored_session.py:240] Graph was finalized.
I0810 21:25:19.903477 4798883264 saver.py:1280] Restoring parameters from /var/folders/0m/rqkyx9816pq795n34h08zf7r0000gn/T/tmppy1dlhmj/model.ckpt-1500
I0810 21:25:19.949110 4798883264 session_manager.py:500] Running local_init_op.
I0810 21:25:19.964850 4798883264 session_manager.py:502] Done running local_init_op.
I0810 21:25:20.084677 4798883264 evaluation.py:167] Evaluation [1/15]
I0810 21:25:20.087746 4798883264 evaluation.py:167] Evaluation [2/15]
I0810 21:25:20.090905 4798883264 evaluation.py:167] Evaluation [3/15]
I0810 21

{'accuracy': 0.10208333,
 'average_loss': 2.1984663,
 'custom_metric': 3.791076,
 'loss': 70.35092,
 'global_step': 1500}