In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Concatenate, Lambda, Add, Dot, Activation, Layer

In [None]:
from utils.conf_process import myconfigparser
from utils.feature_process import tuple_feature_process
from utils.active_method import activate
from utils.optimizer_method import get_optimizer
from utils.distribut_conf import set_dist_env

In [None]:
def build_model_columns(conf_path):

    deep_columns = list()
    default_dict = {}
    embedding_nums = 0

    cfg = myconfigparser()
    cfg.read(conf_path)
    sections = cfg.sections()

    for part_key in sections:

        items_lst = cfg.items(part_key)

        if part_key == "deep":
            for items in items_lst:
                feature_name = items[0].strip()
                feat_method,default_val,embedding_num = tuple_feature_process(items,int(FLAGS.embedding_size))
                embedding_nums += embedding_num
                if feature_name not in default_dict:
                    default_dict[feature_name] = default_val
                deep_columns.append(feat_method)

    columns = cfg.items("use")[0][1].strip().split(",")
    default_dict["label"] = 0
    cols_default = list(map(lambda x:["-1"] if x not in default_dict else [default_dict[x]], columns))
    return deep_columns, columns, cols_default,embedding_nums


In [None]:
fixlen_feature_columns = [DenseFeat(feat, 1,) for feat in dense_features] \
                       + [SparseFeat(feat, data[feat].nunique()) for feat in sparse_features
                             if data[feat].nunique() < 10000] \

dnn_feature_columns = fixlen_feature_columns
linear_feature_columns = fixlen_feature_columns

fixlen_feature_names = get_fixlen_feature_names(
    linear_feature_columns + dnn_feature_columns)

In [None]:
class FM(tf.keras.layers.Layer):
    """Factorization Machine models pairwise (order-2) feature interactions
     without linear term and bias.
      Input shape
        - 3D tensor with shape: ``(batch_size,field_size,embedding_size)``.
      Output shape
        - 2D tensor with shape: ``(batch_size, 1)``.
    """

    def __init__(self):
        super(FM, self).__init__()

    def call(self, input):
        square_of_sum = tf.math.pow(tf.math.reduce_sum(input, 1, keepdims=True), 2)
        sum_of_square = tf.math.reduce_sum(input * input, 1, keepdims=True)
        cross_term = square_of_sum - sum_of_square
        cross_term = 0.5 * tf.math.reduce_sum(cross_term, axis=2, keepdims=False)

        return tf.squeeze(cross_term, -1)


In [1]:
class Base(tf.keras.Model):
    def __init__(self, linear_feature_columns, dnn_feature_columns, sparse_emb_dim):
        super(Base, self).__init__()

        self.feature_index = build_input_features(
            linear_feature_columns + dnn_feature_columns)
        self.dnn_feature_columns = dnn_feature_columns

        self.sparse_feature_columns = list(
                filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)
                                          ) if len(dnn_feature_columns) else []
        self.varlen_sparse_feature_columns = list(
            filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)
                                                 ) if dnn_feature_columns else []
        self.dense_feature_columns = list(
                filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)
                                         ) if len(dnn_feature_columns) else []

        self.embedding_dict = {feat.embedding_name: layers.Embedding(feat.dimension, sparse_emb_dim, embeddings_initializer='normal')
                                for feat in self.sparse_feature_columns+self.varlen_sparse_feature_columns}

        # self.weight = tf.Variable(tf.random.normal(
        #                           [sum(fc.dimension for fc in self.dense_feature_columns), 1],
        #                           stddev=0.0001), trainable=True)
        self.out_bias= tf.Variable(tf.zeros([1,]), trainable=True)


    def input_from_feature_columns(self, x):
        sparse_embedding_list = [self.embedding_dict[feat.embedding_name](
            x[:, self.feature_index[feat.name][0]:self.feature_index[feat.name][1]])
            for feat in self.sparse_feature_columns]
        varlen_sparse_embedding_list = [self.embedding_dict[feat.embedding_name](
            x[:, self.feature_index[feat.name][0]:self.feature_index[feat.name][1]])
            for feat in self.varlen_sparse_feature_columns]

        dense_value_list = [x[:, self.feature_index[feat.name][0]:self.feature_index[feat.name][1]] for feat in
                            self.dense_feature_columns]

        return sparse_embedding_list, \
               varlen_sparse_embedding_list, \
               dense_value_list

class DeepFM(Base):
    def __init__(self, linear_feature_columns, dnn_feature_columns,
                 sparse_emb_dim, dnn_layers, dropout_rate=0.5):
        super(DeepFM, self).__init__(linear_feature_columns, dnn_feature_columns,
                                     sparse_emb_dim)

        self.fm = FM()
        self.dnn= tf.keras.Sequential([
                            DNN(sum(map(lambda x: x.dimension, self.dense_feature_columns)) +
                                  len(self.sparse_feature_columns) * sparse_emb_dim,
                                  dnn_layers, dropout_rate=dropout_rate),
                            layers.Dense(1, use_bias=False, activation='linear')])

    def call(self, x):
        sparse_emb, varlen_emb, dense_emb = self.input_from_feature_columns(x)

        linear_sparse_logit = tf.reduce_sum(
                                tf.concat(sparse_emb, axis=-1), axis=-1, keepdims=False)

        if len(dense_emb):
            linear_dense_logit = tf.matmul(tf.concat(
                                    dense_emb, axis=-1), self.weight)
            logit = tf.squeeze(linear_sparse_logit + linear_dense_logit, -1)

            logit += self.dnn(tf.concat([tf.squeeze(tf.concat(sparse_emb + varlen_emb, -1), 1),
                                        tf.concat(dense_emb, -1)], axis=-1))

        else:
            logit = tf.squeeze(linear_sparse_logit, -1)

            logit += tf.squeeze(self.dnn(tf.concat(
                [tf.squeeze(x, 1) for x in sparse_emb] + [tf.reshape(varlen_emb, [x.shape[0],-1])], 1)), 1)

            logit += self.fm(tf.concat(sparse_emb + varlen_emb, axis=1))

        pred = logit + self.out_bias

        return pred

NameError: name 'tf' is not defined

In [None]:
model = DeepFM(linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns, task='binary',
               l2_reg_embedding=1e-5, device=device)

model.compile("adagrad", "binary_crossentropy",
              metrics=["binary_crossentropy", "auc"],)
model.fit(train_model_input, train[target].values,
          batch_size=32, epochs=10, validation_split=0.2, verbose=2)