https://deepctr-doc.readthedocs.io/en/latest/Quick-Start.html

In [1]:
import os

DIR_DATA = os.path.join(os.environ["HOME"], "workspace/third_party/shenweichen/DeepCTR/examples")

In [15]:
import pandas as pd
import tensorflow as tf

from tensorflow.python.ops.parsing_ops import  FixedLenFeature
from deepctr.estimator.inputs import input_fn_tfrecord
from deepctr.estimator.models import DeepFMEstimator

In [3]:
sparse_features = ['C' + str(i) for i in range(1, 27)]
dense_features = ['I' + str(i) for i in range(1, 14)]

In [22]:
dnn_feature_columns = []
linear_feature_columns = []

In [23]:
for i, feat in enumerate(sparse_features):
    dnn_feature_columns.append(
        tf.feature_column.embedding_column(
            categorical_column = tf.feature_column.categorical_column_with_identity(key = feat, num_buckets = 1000),
            dimension = 4
        )
      )
    linear_feature_columns.append(
        tf.feature_column.categorical_column_with_identity(key = feat, num_buckets = 1000)
    )

In [24]:
for feat in dense_features:
    dnn_feature_columns.append(tf.feature_column.numeric_column(feat))
    linear_feature_columns.append(tf.feature_column.numeric_column(feat))

In [4]:
feature_description = {
    k: FixedLenFeature(dtype=tf.int64, shape=1)
    for k in sparse_features
}

feature_description.update({
    k: FixedLenFeature(dtype=tf.float32, shape=1)
    for k in dense_features
})

feature_description['label'] = FixedLenFeature(dtype=tf.float32, shape=1)

In [25]:
help(input_fn_tfrecord)

Help on function input_fn_tfrecord in module deepctr.estimator.inputs:

input_fn_tfrecord(filenames, feature_description, label=None, batch_size=256, num_epochs=1, num_parallel_calls=8, shuffle_factor=10, prefetch_factor=1)



In [5]:
train_model_input = input_fn_tfrecord(
    filenames=os.path.join(DIR_DATA, 'criteo_sample.tr.tfrecords'),
    feature_description=feature_description, 
    label='label',
    batch_size=256,
    num_epochs=1,
    shuffle_factor=10
)

In [6]:
test_model_input = input_fn_tfrecord(
    filenames=os.path.join(DIR_DATA, 'criteo_sample.te.tfrecords'),
    feature_description=feature_description,
    label='label',
    batch_size=2 ** 14,
    num_epochs=1,
    shuffle_factor=0
)

In [7]:
model = DeepFMEstimator(
    linear_feature_columns,
    dnn_feature_columns,
    task='binary'
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/T/tmplnkl24oi', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [26]:
type(model)

tensorflow_estimator.python.estimator.estimator.EstimatorV2

In [18]:
estimator = model.train(train_model_input)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/T/tmplnkl24oi/model.ckpt-2
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 2...
INFO:tensorflow:Saving checkpoints for 2 into /var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/T/tmplnkl24oi/model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 2...
INFO:tensorflow:loss = 384.84216, step = 2
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 3...
INFO:tensorflow:Saving checkpoints for 3 into /var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/T/tmplnkl24oi/model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 3...
INFO:tensorflow:Loss for final step: 384.84216.


In [12]:
eval_result = model.evaluate(test_model_input)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2022-09-24T18:50:03
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/T/tmplnkl24oi/model.ckpt-2
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 2.10400s
INFO:tensorflow:Finished evaluation at 2022-09-24-18:50:05
INFO:tensorflow:Saving dict for global step 2: AUC = 0.6116072, LogLoss = 1.9607964, global_step = 2, label/mean = 0.3, loss = 78.436134, prediction/mean = 0.24683996
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 2: /var/folders/1l/gvk88th52gj0p_5n_97nkl680000gn/T/tmplnkl24oi/model.ckpt-2


In [13]:
print(eval_result)

{'AUC': 0.6116072, 'LogLoss': 1.9607964, 'label/mean': 0.3, 'loss': 78.436134, 'prediction/mean': 0.24683996, 'global_step': 2}
