In [2]:
import tensorflow as tf
import numpy as np
import random

In [4]:
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)

In [83]:
data_size = 500
max_transaction_history = 20
max_product_click_history = 20
max_promotion_click_history = 20
input_embedding_size = 16
category_size = 5
numeric_size = 1
mh=5

data1 = np.random.randint(category_size, size=(data_size, random.randint(1,max_transaction_history)))
data1 = tf.keras.preprocessing.sequence.pad_sequences(data1, max_transaction_history, padding='post')
#data1 = np.array([[1,2], [1,2,4]])

data2 = np.random.randint(category_size, size=(data_size, max_product_click_history-3))
data2 = tf.keras.preprocessing.sequence.pad_sequences(data2, max_product_click_history, padding='post')

data3 = np.random.randint(category_size, size=(data_size, max_promotion_click_history-4))
data3 = tf.keras.preprocessing.sequence.pad_sequences(data3, max_promotion_click_history, padding='post')

inputs = [data1, data2, data3]

single_category_cols = {105:(3,8),106:(5,8),107:(10,8)}   ## such as location : unique_value_size
for k in single_category_cols:
    inputs.append(np.random.randint(single_category_cols[k][0], size=(data_size, 1)))

num1 = np.random.random(size=(data_size, numeric_size))
inputs.append(num1)

labels = np.random.randint(0, category_size-1, size=(data_size, mh))
one_hot_labels = tf.keras.utils.to_categorical(labels, num_classes=category_size)

In [84]:
output_filename = "../data/tf.tfrecord"

def float_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))

def int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=list(value)))

with tf.python_io.TFRecordWriter(output_filename) as writer:
    for (v1,v2,v3,v4,v5,v6,v7,v8) in zip(data1, data2, data3, inputs[3], inputs[4], inputs[5], inputs[6], labels):
        features = {'seq_categorical_0': int64_feature(v1), 'seq_categorical_1': int64_feature(v2),
                    'seq_categorical_2': int64_feature(v3),'105': int64_feature(v4),
                    '106': int64_feature(v5),'107': int64_feature(v6), 'numeric': float_feature(v7),
                    'labels': int64_feature(v8)}
        tf_example = tf.train.Example(features=tf.train.Features(feature=features))
        writer.write(tf_example.SerializeToString())


In [111]:
def parse_function(example_proto):
    features = {"seq_categorical_0":tf.FixedLenFeature([max_transaction_history],tf.int64),
                "seq_categorical_1":tf.FixedLenFeature([max_product_click_history], tf.int64),
                "seq_categorical_2":tf.FixedLenFeature([max_promotion_click_history], tf.int64),
                "105":tf.FixedLenFeature([1], tf.int64),
                "106":tf.FixedLenFeature([1], tf.int64),
                "107":tf.FixedLenFeature([1], tf.int64),
                "numeric":tf.FixedLenFeature([1], tf.float32),
                "labels":tf.FixedLenFeature([5], tf.int64),
         }
    parsed_features = tf.parse_single_example(example_proto, features)
    #return parsed_features
    #return (parsed_features["seq_categorical_0"], parsed_features["seq_categorical_1"], parsed_features["seq_categorical_2"], parsed_features["105"], parsed_features["106"], parsed_features["107"], parsed_features["numeric"]), parsed_features["labels"]
    #{'labels': tf.clip_by_value(parsed_features["labels"], 0, 1)}
    return {'seq_categorical_0': parsed_features["seq_categorical_0"], 
            'seq_categorical_1':parsed_features["seq_categorical_1"], 
            }, parsed_features["labels"]  #'numeric':parsed_features["numeric"]



In [131]:
def load_data():
    ds=tf.data.TFRecordDataset('../data/tf.tfrecord')
    ds=ds.map(parse_function, num_parallel_calls=4)
    ds=ds.repeat() 
    ds=ds.batch(64)
    #return ds
    iterator = ds.make_one_shot_iterator()
    fts, labels = iterator.get_next()
    fts['seq_position'] = [x for x in range(20)]
    print(tf.clip_by_value(labels, 0, 1))
    return fts, tf.clip_by_value(labels, 0, 1)

In [124]:
tran_seq = tf.feature_column.categorical_column_with_identity('seq_categorical_0', category_size)
promo_seq = tf.feature_column.categorical_column_with_identity('seq_categorical_1', category_size)
position_seq = tf.feature_column.categorical_column_with_identity('seq_position', 20)


In [134]:
tans_emb = tf.feature_column.embedding_column(categorical_column=tran_seq, dimension=input_embedding_size)
promo_emb = tf.feature_column.embedding_column(categorical_column=promo_seq, 
                                               dimension=input_embedding_size, combiner='mean')
position_emb = tf.feature_column.embedding_column(categorical_column=position_seq, 
                                               dimension=input_embedding_size, combiner='mean')




In [138]:
tans_emb = tans_emb + position_emb
print(tans_emb)

(IdentityCategoricalColumn(key='seq_categorical_0', number_buckets=5, default_value=None), 16, 'mean', <tensorflow.python.ops.init_ops.TruncatedNormal object at 0x133cdac50>, None, None, None, True, IdentityCategoricalColumn(key='seq_position', number_buckets=20, default_value=None), 16, 'mean', <tensorflow.python.ops.init_ops.TruncatedNormal object at 0x133cdacc0>, None, None, None, True, IdentityCategoricalColumn(key='seq_position', number_buckets=20, default_value=None), 16, 'mean', <tensorflow.python.ops.init_ops.TruncatedNormal object at 0x133cdacc0>, None, None, None, True)


In [136]:
estimator = tf.estimator.DNNEstimator(
    head=tf.contrib.estimator.multi_label_head(n_classes=5),
    feature_columns=[tans_emb, promo_emb],
    hidden_units=[128],
    #optimizer=tf.train.AdamOptimizer(0.0001),
    optimizer=lambda: tf.train.AdamOptimizer(
        learning_rate=tf.train.exponential_decay(
            learning_rate=0.001,
            global_step=tf.train.get_global_step(),
            decay_steps=5000,
            decay_rate=0.96)))

I0815 15:16:45.101764 4682024384 estimator.py:1790] Using default config.
W0815 15:16:45.103368 4682024384 estimator.py:1811] Using temporary folder as model directory: /var/folders/0m/rqkyx9816pq795n34h08zf7r0000gn/T/tmpnloqwkt6
I0815 15:16:45.104286 4682024384 estimator.py:209] Using config: {'_model_dir': '/var/folders/0m/rqkyx9816pq795n34h08zf7r0000gn/T/tmpnloqwkt6', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x135acbf60>, '_tas

In [137]:
estimator.train(input_fn=load_data, steps=1000)

I0815 15:16:46.264050 4682024384 estimator.py:1145] Calling model_fn.


Tensor("clip_by_value:0", shape=(?, 5), dtype=int64, device=/device:CPU:0)


ValueError: in converted code:
    relative to /Users/pengcheng.jia/anaconda3/envs/reco/lib/python3.6/site-packages:

    tensorflow_estimator/python/estimator/canned/dnn.py:250 call *
        net = self._input_layer(features)
    tensorflow/python/feature_column/feature_column.py:337 __call__
        from_template=True)
    tensorflow/python/ops/template.py:392 __call__
        return self._call_func(args, kwargs)
    tensorflow/python/ops/template.py:354 _call_func
        result = self._func(*args, **kwargs)
    tensorflow/python/feature_column/feature_column.py:181 _internal_input_layer
        feature_columns = _normalize_feature_columns(feature_columns)
    tensorflow/python/feature_column/feature_column.py:2257 _normalize_feature_columns
        'Given (type {}): {}.'.format(type(column), column))

    ValueError: Items of feature_columns must be a _FeatureColumn. Given (type <class 'tuple'>): (IdentityCategoricalColumn(key='seq_categorical_0', number_buckets=5, default_value=None), 16, 'mean', <tensorflow.python.ops.init_ops.TruncatedNormal object at 0x133cdac50>, None, None, None, True, IdentityCategoricalColumn(key='seq_position', number_buckets=20, default_value=None), 16, 'mean', <tensorflow.python.ops.init_ops.TruncatedNormal object at 0x133cdacc0>, None, None, None, True).
    
    originally defined at:
      File "/Users/pengcheng.jia/anaconda3/envs/reco/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/canned/dnn.py", line 106, in dnn_logit_fn
        name='dnn')
      File "/Users/pengcheng.jia/anaconda3/envs/reco/lib/python3.6/site-packages/tensorflow_estimator/python/estimator/canned/dnn.py", line 189, in __init__
        create_scope_now=False)
      File "/Users/pengcheng.jia/anaconda3/envs/reco/lib/python3.6/site-packages/tensorflow/python/feature_column/feature_column.py", line 327, in __init__
        self._name, _internal_input_layer, create_scope_now_=create_scope_now)
      File "/Users/pengcheng.jia/anaconda3/envs/reco/lib/python3.6/site-packages/tensorflow/python/ops/template.py", line 160, in make_template
        **kwargs)
    


In [122]:
estimator.evaluate(input_fn=load_data, steps=5000//64)

I0814 21:53:59.802716 4682024384 estimator.py:1145] Calling model_fn.


Tensor("clip_by_value:0", shape=(?, 5), dtype=int64, device=/device:CPU:0)


W0814 21:54:00.167342 4682024384 metrics_impl.py:804] Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead.
W0814 21:54:00.182140 4682024384 metrics_impl.py:804] Trapezoidal rule is known to produce incorrect PR-AUCs; please switch to "careful_interpolation" instead.
I0814 21:54:00.204506 4682024384 estimator.py:1147] Done calling model_fn.
I0814 21:54:00.223821 4682024384 evaluation.py:255] Starting evaluation at 2019-08-14T21:54:00Z
I0814 21:54:00.299190 4682024384 monitored_session.py:240] Graph was finalized.
I0814 21:54:00.300963 4682024384 saver.py:1280] Restoring parameters from /var/folders/0m/rqkyx9816pq795n34h08zf7r0000gn/T/tmp3zs4etpn/model.ckpt-1000
I0814 21:54:00.348308 4682024384 session_manager.py:500] Running local_init_op.
I0814 21:54:00.368274 4682024384 session_manager.py:502] Done running local_init_op.
I0814 21:54:00.541041 4682024384 evaluation.py:167] Evaluation [7/78]
I0814 21:54:00.558161 4682024384 evaluation

{'auc': 0.60497695,
 'auc_precision_recall': 0.80107063,
 'average_loss': 0.563053,
 'loss': 0.563053,
 'global_step': 1000}

In [130]:
def custom_metric(labels, predictions):
    # This function will be called by the Estimator, passing its predictions.
    # Let's suppose you want to add the "mean" metric...

    # Accessing the class predictions (careful, the key name may change from one canned Estimator to another)
    predicted_classes = predictions["class_ids"]  

    # Defining the metric (value and update tensors):
    custom_metric = tf.metrics.mean(labels, predicted_classes, name="custom_metric")

    # Returning as a dict:
    return {"custom_metric": custom_metric}

In [66]:
class RecoEstimator():
    
    def __init__(self, max_transaction_history = 20, max_product_click_history = 20, max_promotion_click_history = 20,
                 category_size = 100, single_categorical_features = None, numeric_features_size = 10,
                 hidden_layer1_size = 1024, hidden_layer2_size = 512, hidden_layer3_size = 256, activation='relu',
                input_embedding_size = 128):
        
        self.max_transaction_history = max_transaction_history
        self.max_product_click_history = max_product_click_history
        self.max_promotion_click_history = max_promotion_click_history
        self.category_size = category_size
        self.hidden_layer1_size = hidden_layer1_size
        self.hidden_layer2_size = hidden_layer2_size
        self.hidden_layer3_size = hidden_layer3_size
        self.single_categorical_features = single_categorical_features
        self.numeric_features_size = numeric_features_size
        self.activation = activation
        self.input_embedding_size = input_embedding_size
        self.model = None
        self.build()
        
    
    def build(self):
        seqs = self.create_input()
        
        self.model = tf.estimator.DNNClassifier(feature_columns=seqs,n_classes=self.category_size,
                                               hidden_units=[self.hidden_layer1_size, self.hidden_layer2_size, self.hidden_layer3_size],
                                               optimizer=tf.train.AdamOptimizer(0.0005))
        
        
    def create_input(self):
        
        transaction_cols = [x for x in range(self.max_transaction_history)]
        promotion_click_cols = [x for x in range(self.max_promotion_click_history)]
        seqs=[]
        seqs.append(self.seq_categorical_input('seq_categorical_0', len(transaction_cols)))
        seqs.append(self.seq_categorical_input('seq_categorical_1', len(promotion_click_cols)))
        
        seqs.append(tf.feature_column.numeric_column('numeric'))
            
        return seqs
        
    def seq_categorical_input(self, name, max_history):
        seq_input = tf.feature_column.categorical_column_with_identity(name, self.category_size)
        seq_emb = tf.feature_column.embedding_column(categorical_column=seq_input, dimension=self.input_embedding_size)
        return seq_emb

In [67]:
model = RecoEstimator(max_transaction_history, max_product_click_history, max_promotion_click_history, category_size,
                numeric_features_size = numeric_size, input_embedding_size = input_embedding_size,
                single_categorical_features = single_category_cols).model

I0814 21:42:24.087383 4682024384 estimator.py:1790] Using default config.
W0814 21:42:24.088814 4682024384 estimator.py:1811] Using temporary folder as model directory: /var/folders/0m/rqkyx9816pq795n34h08zf7r0000gn/T/tmpqoywrxjt
I0814 21:42:24.089673 4682024384 estimator.py:209] Using config: {'_model_dir': '/var/folders/0m/rqkyx9816pq795n34h08zf7r0000gn/T/tmpqoywrxjt', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x133faeb70>, '_tas

In [68]:
model = tf.estimator.add_metrics(model, custom_metric)

NameError: name 'custom_metric' is not defined

In [69]:
model = tf.estimator.add_metrics(model, my_auc)

NameError: name 'my_auc' is not defined

In [70]:
model.train(input_fn=load_data, steps=1500)

I0814 21:42:35.163906 4682024384 estimator.py:1145] Calling model_fn.


Tensor("ParseSingleExample/ParseSingleExample:3", shape=(5,), dtype=int64)


ValueError: Mismatched label shape. Expected labels dimension=1.  Received 5. Suggested Fix:If your classifier expects one-hot encoding label,check your n_classes argument to the estimator and/or the shape of your label. Otherwise, check the shape of your label.

In [156]:
model.evaluate(input_fn=load_data, steps=500//32)

I0810 21:25:19.454617 4798883264 estimator.py:1145] Calling model_fn.
I0810 21:25:19.455368 4798883264 estimator.py:1145] Calling model_fn.
I0810 21:25:19.748594 4798883264 estimator.py:1147] Done calling model_fn.
I0810 21:25:19.791748 4798883264 estimator.py:1147] Done calling model_fn.
I0810 21:25:19.808421 4798883264 evaluation.py:255] Starting evaluation at 2019-08-10T21:25:19Z
I0810 21:25:19.902137 4798883264 monitored_session.py:240] Graph was finalized.
I0810 21:25:19.903477 4798883264 saver.py:1280] Restoring parameters from /var/folders/0m/rqkyx9816pq795n34h08zf7r0000gn/T/tmppy1dlhmj/model.ckpt-1500
I0810 21:25:19.949110 4798883264 session_manager.py:500] Running local_init_op.
I0810 21:25:19.964850 4798883264 session_manager.py:502] Done running local_init_op.
I0810 21:25:20.084677 4798883264 evaluation.py:167] Evaluation [1/15]
I0810 21:25:20.087746 4798883264 evaluation.py:167] Evaluation [2/15]
I0810 21:25:20.090905 4798883264 evaluation.py:167] Evaluation [3/15]
I0810 21

{'accuracy': 0.10208333,
 'average_loss': 2.1984663,
 'custom_metric': 3.791076,
 'loss': 70.35092,
 'global_step': 1500}