In [None]:
# deepfm

"""
    Input shape
    - [batch_size, field_size, embedding_size]
    Output shape
    - [batch_size, 1]
"""

class FM(Layer):
    def __init__(self, **kwargs):
        super(FM, self).__init__(**kwargs)

    def build(self, input_shape):
        super(FM, self).build(input_shape)

    def call(self, inputs, **kwargs):
        concated_embeds_value = inputs
        square_of_sum = tf.square(tf.reduce_sum(
            concated_embeds_value, axis=1, keep_dims=True))
        sum_of_square = tf.reduce_sum(
            concated_embeds_value * concated_embeds_value, axis=1, keep_dims=True)
        cross_term = square_of_sum - sum_of_square
        cross_term = 0.5 * tf.reduce_sum(cross_term, axis=2, keep_dims=False)
        return cross_term

    def compute_output_shape(self, input_shape):
        return (None, 1)

In [None]:
# latent deepfm 

"""
    Input shape
    - [batch_size, field_size, embedding_size]
    Output shape
    - [batch_size, 1]
"""

class FM(Layer):
    def __init__(self, k=10, **kwargs):
        self.k = k
        super(FM, self).__init__(**kwargs)

    def build(self, input_shape):
        dim = int(input_shape[-1])
        self.V = self.add_weight(name='V', 
                                 shape=(self.k, dim),
                                 initializer=glorot_normal(),
                                 trainable=True)
        super(FM, self).build(input_shape)

    def call(self, inputs, **kwargs):
        second_order = 0.5 * tf.reduce_sum(
            tf.square(tf.matmul(inputs, tf.transpose(self.V))) -
            tf.matmul(tf.square(inputs), tf.square(tf.transpose(self.V))), 
            axis=2, keepdims=False)
        return second_order

    def compute_output_shape(self, input_shape):
        return (None, 1)

In [None]:
# fwfm

"""
    Input shape
    - [batch_size, field_size, embedding_size]
    Output shape
    - [batch_size, 1]
"""
import itertools

class FwFM(Layer):
    def __init__(self, **kwargs):
        super(FwFM, self).__init__(**kwargs)

    def build(self, input_shape):
        self.num_fields = int(input_shape[1])
        self.field_strengths = self.add_weight(name='field_pair_weights',
                                               shape=(self.num_fields, self.num_fields),
                                               initializer=glorot_normal(),
                                               trainable=True)

        super(FwFM, self).build(input_shape)

    def call(self, inputs, **kwargs):
        pairwise_inner_prods = []
        for fi, fj in itertools.combinations(range(self.num_fields), 2):
            r_ij = self.field_strengths[fi, fj]
            feat_embed_i = tf.squeeze(inputs[:, fi:fi + 1, :], axis=1)
            feat_embed_j = tf.squeeze(inputs[:, fj:fj + 1, :], axis=1)

            f = tf.scalar_mul(r_ij, K.batch_dot(feat_embed_i, feat_embed_j, axes=1))
            pairwise_inner_prods.append(f)

        sum_ = tf.add_n(pairwise_inner_prods)
        return sum_

    def compute_output_shape(self, input_shape):
        return (None, 1)

In [None]:
# IPNN

"""
    Input shape
    - A list of [batch_size, 1, embedding_size]
    Output shape
    - [batch_size, N * (N - 1) / 2, 1]
"""

class InnerProductLayer(Layer):
    def __init__(self, **kwargs):
        super(InnerProductLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        super(InnerProductLayer, self).build(input_shape)

    def call(self, inputs, **kwargs):
        embed_list = inputs
        row = []
        col = []
        num_inputs = len(embed_list)

        for i in range(num_inputs - 1):
            for j in range(i + 1, num_inputs):
                row.append(i)
                col.append(j)
        p = tf.concat([embed_list[idx] for idx in row], axis=1)
        q = tf.concat([embed_list[idx] for idx in col], axis=1)

        inner_product = p * q
        inner_product = tf.reduce_sum(inner_product, axis=2, keep_dims=True)
        return inner_product

    def compute_output_shape(self, input_shape):
        num_inputs = len(input_shape)
        num_pairs = int(num_inputs * (num_inputs - 1) / 2)
        input_shape = input_shape[0]
        embed_size = input_shape[-1]
        return (input_shape[0], num_pairs, 1)
    
    
# OPNN

"""
    Input shape
    - A list of [batch_size, 1, embedding_size]

    Output shape
    - [batch_size, N * (N - 1) / 2]
"""

class OutterProductLayer(Layer):
    def __init__(self, **kwargs):
        super(OutterProductLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        num_inputs = len(input_shape)
        num_pairs = int(num_inputs * (num_inputs - 1) / 2)
        input_shape = input_shape[0]
        embed_size = int(input_shape[-1])
        self.kernel = self.add_weight(shape=(embed_size, num_pairs, embed_size),
                                      initializer=glorot_uniform(),
                                      name='kernel')
        super(OutterProductLayer, self).build(input_shape)

    def call(self, inputs, **kwargs):
        embed_list = inputs
        row = []
        col = []
        num_inputs = len(embed_list)
        for i in range(num_inputs - 1):
            for j in range(i + 1, num_inputs):
                row.append(i)
                col.append(j)
        p = tf.concat([embed_list[idx] for idx in row], axis=1)
        q = tf.concat([embed_list[idx] for idx in col], axis=1)
        p = tf.expand_dims(p, 1)
        kp = tf.reduce_sum(tf.multiply(tf.transpose(tf.reduce_sum(tf.multiply(p, self.kernel), -1), [0, 2, 1]), q), -1)
        return kp

    def compute_output_shape(self, input_shape):
        num_inputs = len(input_shape)
        num_pairs = int(num_inputs * (num_inputs - 1) / 2)
        return (None, num_pairs)

In [None]:
# DCN_V1

"""
    Input shape
    - [batch_size, feature_dims]
    Output shape
    - [batch_size, feature_dims]
"""

class CrossNet_V1(Layer):
    def __init__(self, layer_num=2, **kwargs):
        self.layer_num = layer_num
        super(CrossNet, self).__init__(**kwargs)

    def build(self, input_shape):
        dim = int(input_shape[-1])
        self.kernels = [self.add_weight(name='kernel' + str(i),
                                        shape=(dim, 1),
                                        initializer=glorot_normal(),
                                        trainable=True) for i in range(self.layer_num)]
        self.bias = [self.add_weight(name='bias' + str(i),
                                     shape=(dim, 1),
                                     initializer=Zeros(),
                                     trainable=True) for i in range(self.layer_num)]
        super(CrossNet, self).build(input_shape)

    def call(self, inputs, **kwargs):
        x_0 = tf.expand_dims(inputs, axis=2)
        x_l = x_0
        for i in range(self.layer_num):
            xl_w = tf.tensordot(x_l, self.kernels[i], axes=(1, 0))
            dot_ = tf.matmul(x_0, xl_w)
            x_l = dot_ + self.bias[i] + x_l
        x_l = tf.squeeze(x_l, axis=2)
        return x_l

    def get_config(self, ):
        config = {'layer_num': self.layer_num}
        base_config = super(CrossNet, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def compute_output_shape(self, input_shape):
        return input_shape

In [None]:
# DCN_V2

"""
    Input shape
    - [batch_size, feature_dims]
    Output shape
    - [batch_size, feature_dims]
"""

class CrossNet_V2(Layer):
    def __init__(self, layer_num=2, **kwargs):
        self.layer_num = layer_num
        super(CrossNet_V2, self).__init__(**kwargs)

    def build(self, input_shape):
        dim = int(input_shape[-1])
        self.kernels = [self.add_weight(name='kernel' + str(i),
                                        shape=(dim, dim),
                                        initializer=glorot_normal(),
                                        trainable=True) for i in range(self.layer_num)]
        self.bias = [self.add_weight(name='bias' + str(i),
                                     shape=(dim, 1),
                                     initializer=Zeros(),
                                     trainable=True) for i in range(self.layer_num)]
        super(CrossNet_V2, self).build(input_shape)

    def call(self, inputs, **kwargs):
        x_0 = tf.expand_dims(inputs, axis=2)
        x_l = x_0
        for i in range(self.layer_num):
            wl_xl = tf.matmul(self.kernels[i], x_l)
            x_m = wl_xl + self.bias[i]
            x_l = x_0 * x_m + x_l
        x_l = tf.squeeze(x_l, axis=2)
        return x_l

    def get_config(self, ):
        config = {'layer_num': self.layer_num}
        base_config = super(CrossNet_V2, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def compute_output_shape(self, input_shape):
        return input_shape

In [None]:
# DCN_M

"""
    Input shape
    - [batch_size, feature_dims]
    Output shape
    - [batch_size, feature_dims]
"""

class CrossNetMix(Layer):
    def __init__(self, low_rank=32, num_experts=4, layer_num=2):
        self.low_rank = low_rank
        self.num_experts = num_experts
        self.layer_num = layer_num
        super(CrossNetMix, self).__init__()

    def build(self, input_shape):
        # U: (feature_dims, low_rank)
        self.feature_dims = int(input_shape[1])
        self.U_list = [self.add_weight(name='kernel_U' + str(i),
                                       shape=(self.num_experts, self.feature_dims, self.low_rank),
                                       initializer=glorot_normal())
                                       for i in range(self.layer_num)]
        # V: (feature_dims, low_rank)
        self.V_list = [self.add_weight(name='kernel_V' + str(i),
                                       shape=(self.num_experts, self.feature_dims, self.low_rank),
                                       initializer=glorot_normal())
                                       for i in range(self.layer_num)]

        # C: (low_rank, low_rank)
        self.C_list = [self.add_weight(name='kernel_C' + str(i),
                                       shape=(self.num_experts, self.low_rank, self.low_rank),
                                       initializer=glorot_normal())
                                       for i in range(self.layer_num)]

        self.gating = [Dense(1, use_bias=False) for i in range(self.num_experts)]

        self.bias = [self.add_weight(name='bias' + str(i),
                                    shape=(self.feature_dims, 1),
                                    initializer=Zeros()) 
                                    for i in range(self.layer_num)]
        super(CrossNetMix, self).build(input_shape)

    def call(self, inputs, **kwargs):
        # (bs, feature_dims, 1)
        x_0 = tf.expand_dims(inputs, 2)  
        x_l = x_0
        for i in range(self.layer_num):
            output_of_experts = []
            gating_score_of_experts = []
            for expert_id in range(self.num_experts):
                # (bs, feature_dims) -> (bs, 1)
                gating_score_of_experts.append(self.gating[expert_id](tf.squeeze(x_l, 2)))

                # (bs, feature_dims, 1) -> (bs, low_rank, 1)
                v_x = tf.matmul(tf.transpose(self.V_list[i][expert_id]), x_l)

                v_x = tf.tanh(v_x)
                
                # (bs, low_rank, 1) -> (bs, low_rank, 1)
                v_x = tf.matmul(self.C_list[i][expert_id], v_x)
                v_x = tf.tanh(v_x)

                # (bs, low_rank, 1) -> (bs, feature_dims, 1)
                uv_x = tf.matmul(self.U_list[i][expert_id], v_x)  

                dot_ = uv_x + self.bias[i]
                
                # (bs, feature_dims, 1)
                dot_ = x_0 * dot_ 

                output_of_experts.append(tf.squeeze(dot_, 2))

            # (bs, feature_dims, num_experts)
            output_of_experts = tf.stack(output_of_experts, 2)  
            # (bs, num_experts, 1)
            gating_score_of_experts = tf.stack(gating_score_of_experts, 1)  
            gating_value = tf.nn.softmax(gating_score_of_experts, 1)
            
            moe_out = tf.matmul(output_of_experts, gating_value)
            x_l = moe_out + x_l  # (bs, feature_dims, 1)

        x_l = tf.squeeze(x_l, -1)  # (bs, feature_dims)
        return x_l

In [2]:
# XDeepFM

"""
    Input shape
    - [batch_size, field_nums, embed_dims]
    Output shape
    - [batch_size, featuremap_num]
"""

class CIN(Layer):
    def __init__(self, layer_size=(128, 128), activation='relu', split_half=True, **kwargs):
        self.layer_size = layer_size
        self.split_half = split_half
        self.activation = activation
        super(CIN, self).__init__(**kwargs)

    def build(self, input_shape):
        self.field_nums = [int(input_shape[1])]
        self.filters = []
        self.bias = []
        for i, size in enumerate(self.layer_size):
            self.filters.append(self.add_weight(name='filter' + str(i),
                                                shape=[1, self.field_nums[-1] * self.field_nums[0], size],
                                                dtype=tf.float32, 
                                                initializer=glorot_uniform()))
            self.bias.append(self.add_weight(name='bias' + str(i), 
                                             shape=[size], 
                                             dtype=tf.float32,
                                             initializer=tf.keras.initializers.Zeros()))
            if self.split_half:
                self.field_nums.append(size // 2)
            else:
                self.field_nums.append(size)
        self.activation_layers = [Activation(self.activation) for _ in self.layer_size]
        super(CIN, self).build(input_shape)

    def call(self, inputs, **kwargs):
        dim = int(inputs.get_shape()[-1])
        hidden_nn_layers = [inputs]
        final_result = []
        split_tensor0 = tf.split(hidden_nn_layers[0], dim * [1], 2)
        
        for idx, layer_size in enumerate(self.layer_size):
            split_tensor = tf.split(hidden_nn_layers[-1], dim * [1], 2)
            
            dot_result_m = tf.matmul(split_tensor0, split_tensor, transpose_b=True)
            
            dot_result_o = tf.reshape(dot_result_m, shape=[dim, -1, self.field_nums[0] * self.field_nums[idx]])
            
            dot_result = tf.transpose(dot_result_o, perm=[1, 0, 2])
            
            curr_out = tf.nn.conv1d(dot_result, filters=self.filters[idx], stride=1, padding='VALID')
            curr_out = tf.nn.bias_add(curr_out, self.bias[idx])
            curr_out = self.activation_layers[idx](curr_out)
            curr_out = tf.transpose(curr_out, perm=[0, 2, 1])

            if self.split_half:
                if idx != len(self.layer_size) - 1:
                    next_hidden, direct_connect = tf.split(curr_out, 2 * [layer_size // 2], 1)
                else:
                    direct_connect = curr_out
                    next_hidden = 0
            else:
                direct_connect = curr_out
                next_hidden = curr_out
                
            final_result.append(direct_connect)
            hidden_nn_layers.append(next_hidden)
            
        result = tf.concat(final_result, axis=1)
        result = tf.reduce_sum(result, -1, keep_dims=False)

        return result

    def compute_output_shape(self, input_shape):
        if self.split_half:
            featuremap_num = sum(self.layer_size[:-1]) // 2 + self.layer_size[-1]
        else:
            featuremap_num = sum(self.layer_size)
        return (None, featuremap_num)

    def get_config(self, ):
        config = {'layer_size': self.layer_size, 'split_half': self.split_half, 'activation': self.activation}
        base_config = super(CIN, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [None]:
# fibinet

import itertools

"""
    Input shape
    - A list of [batch_size, 1, embed_dims]
    Output shape
    - A list of [batch_size, 1, embed_dims]
"""

class SENETLayer(Layer):
    def __init__(self, reduction_ratio=3, **kwargs):
        self.reduction_ratio = reduction_ratio
        super(SENETLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.field_size = int(len(input_shape))
        self.embedding_size = int(input_shape[0][-1])
        reduction_size = max(1, self.field_size // self.reduction_ratio)

        self.W_1 = self.add_weight(shape=(
            self.field_size, reduction_size), initializer=glorot_normal(), name="W_1")
        self.W_2 = self.add_weight(shape=(
            reduction_size, self.field_size), initializer=glorot_normal(), name="W_2")
        self.tensordot = tf.keras.layers.Lambda(
            lambda x: tf.tensordot(x[0], x[1], axes=(-1, 0)))
        
        super(SENETLayer, self).build(input_shape)

    def call(self, inputs, training=None, **kwargs):
        inputs = tf.concat(inputs, axis=1)
        Z = tf.reduce_mean(inputs, axis=-1)

        A_1 = tf.nn.relu(self.tensordot([Z, self.W_1]))
        A_2 = tf.nn.relu(self.tensordot([A_1, self.W_2]))
        V = tf.multiply(inputs, tf.expand_dims(A_2, axis=2))

        return tf.split(V, self.field_size, axis=1)

    def compute_output_shape(self, input_shape):

        return input_shape

    def compute_mask(self, inputs, mask=None):
        return [None] * self.field_size

    def get_config(self, ):
        config = {'reduction_ratio': self.reduction_ratio}
        base_config = super(SENETLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    

# BilinearInteraction

"""
    Input shape
    - A list of [batch_size, 1, embed_dims]
    Output shape
    - [batch_size, 1, field_nums * (field_nums - 1) // 2 * embed_dims)]
"""

class BilinearInteraction(Layer):
    def __init__(self, bilinear_type="interaction", **kwargs):
        self.bilinear_type = bilinear_type
        super(BilinearInteraction, self).__init__(**kwargs)

    def build(self, input_shape):
        embedding_size = int(input_shape[0][-1])
        if self.bilinear_type == "all":
            self.W = self.add_weight(shape=(embedding_size, embedding_size),
                                     initializer=glorot_normal(),
                                     name="bilinear_weight")
        elif self.bilinear_type == "each":
            self.W_list = [self.add_weight(shape=(embedding_size, embedding_size),
                                           initializer=glorot_normal(),
                                           name="bilinear_weight" + str(i))
                           for i in range(len(input_shape) - 1)]
        elif self.bilinear_type == "interaction":
            self.W_list = [self.add_weight(shape=(embedding_size, embedding_size),
                                           initializer=glorot_normal(),
                                           name="bilinear_weight" + str(i) + '_' + str(j))
                           for i, j in itertools.combinations(range(len(input_shape)), 2)]
        super(BilinearInteraction, self).build(input_shape)  

    def call(self, inputs, **kwargs):
        if self.bilinear_type == "all":
            p = [tf.multiply(tf.tensordot(v_i, self.W, axes=(-1, 0)), v_j)
                 for v_i, v_j in itertools.combinations(inputs, 2)]
        elif self.bilinear_type == "each":
            p = [tf.multiply(tf.tensordot(inputs[i], self.W_list[i], axes=(-1, 0)), inputs[j])
                 for i, j in itertools.combinations(range(len(inputs)), 2)]
        elif self.bilinear_type == "interaction":
            p = [tf.multiply(tf.tensordot(v[0], w, axes=(-1, 0)), v[1])
                 for v, w in zip(itertools.combinations(inputs, 2), self.W_list)]
        return tf.concat(p, axis=-1)

    def compute_output_shape(self, input_shape):
        filed_size = len(input_shape)
        embedding_size = input_shape[0][-1]
        return (None, 1, filed_size * (filed_size - 1) // 2 * embedding_size)

    def get_config(self, ):
        config = {'bilinear_type': self.bilinear_type}
        base_config = super(BilinearInteraction, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [None]:
# fgcnn

from keras.utils.conv_utils import conv_output_length

"""
    Input shape
    - [batch_size, field_nums, embedding_dims]

    Output shape
    - [batch_size, new_feature_nums, embedding_dims]
"""

class FGCNNLayer(Layer):
    def __init__(self, filters=(14, 16,), kernel_width=(7, 7,), new_maps=(3, 3,), pooling_width=(2, 2), **kwargs):
        self.filters = filters
        self.kernel_width = kernel_width
        self.new_maps = new_maps
        self.pooling_width = pooling_width
        super(FGCNNLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.conv_layers = []
        self.pooling_layers = []
        self.dense_layers = []
        pooling_shape = input_shape.as_list() + [1, ]
        embedding_size = int(input_shape[-1])
        for i in range(len(self.filters)):
            filters = self.filters[i]
            width = self.kernel_width[i]
            new_filters = self.new_maps[i]
            pooling_width = self.pooling_width[i]
            conv_output_shape = self._conv_output_shape(
                pooling_shape, (width, 1))
            pooling_shape = self._pooling_output_shape(
                conv_output_shape, (pooling_width, 1))
            self.conv_layers.append(Conv2D(filters=filters, 
                                           kernel_size=(width, 1), 
                                           strides=(1, 1),
                                           padding='same',
                                           activation='tanh', 
                                           use_bias=True, ))
            self.pooling_layers.append(MaxPooling2D(pool_size=(pooling_width, 1)))
            self.dense_layers.append(Dense(pooling_shape[1] * embedding_size * new_filters,
                                           activation='tanh', use_bias=True))
        self.flatten = Flatten()
        super(FGCNNLayer, self).build(input_shape)

    def call(self, inputs, **kwargs):
        embedding_size = int(inputs.shape[-1])
        pooling_result = tf.expand_dims(inputs, axis=3)
        new_feature_list = []

        for i in range(1, len(self.filters) + 1):
            new_filters = self.new_maps[i - 1]
            conv_result = self.conv_layers[i - 1](pooling_result)
            pooling_result = self.pooling_layers[i - 1](conv_result)
            flatten_result = self.flatten(pooling_result)
            new_result = self.dense_layers[i - 1](flatten_result)
            new_feature_list.append(
                tf.reshape(new_result, (-1, int(pooling_result.shape[1]) * new_filters, embedding_size)))

        new_features = tf.concat(new_feature_list, axis=1)
        return new_features

    def compute_output_shape(self, input_shape):
        new_features_num = 0
        features_num = input_shape[1]
        for i in range(0, len(self.kernel_width)):
            pooled_features_num = features_num // self.pooling_width[i]
            new_features_num += self.new_maps[i] * pooled_features_num
            features_num = pooled_features_num
        return (None, new_features_num, input_shape[-1])

    def get_config(self, ):
        config = {'kernel_width': self.kernel_width, 'filters': self.filters, 'new_maps': self.new_maps,
                  'pooling_width': self.pooling_width}
        base_config = super(FGCNNLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def _conv_output_shape(self, input_shape, kernel_size):
        space = input_shape[1:-1]
        new_space = []
        for i in range(len(space)):
            new_dim = conv_output_length(
                space[i],
                kernel_size[i],
                padding='same',
                stride=1,
                dilation=1)
            new_space.append(new_dim)
        return ([input_shape[0]] + new_space + [self.filters])

    def _pooling_output_shape(self, input_shape, pool_size):
        rows = input_shape[1]
        cols = input_shape[2]
        rows = conv_output_length(rows, pool_size[0], 'valid', pool_size[0])
        cols = conv_output_length(cols, pool_size[1], 'valid', pool_size[1])
        return [input_shape[0], rows, cols, input_shape[3]]

In [None]:
# simple attention
# softmax(tanh(qW)) * q

"""
    Input shape
    - [batch_size, seq_len, embedding_dims]

    Output shape
    - [batch_size, embedding_dims]
"""


class Attention(Layer):

    def __init__(self, W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None, bias=True, **kwargs):
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.step_dim = 0
        self.features_dim = 0
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        self.step_dim = int(input_shape[1])
        self.features_dim = int(input_shape[-1])

        self.W = self.add_weight(name='{}_W'.format(self.name),
                                 shape=(self.features_dim,),
                                 initializer=self.init,
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        

        if self.bias:
            self.b = self.add_weight(name='{}_b'.format(self.name),
                                     shape=(self.step_dim,),
                                     initializer='zero',
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None

        self.built = True
        super(Attention, self).build(input_shape)

    def compute_mask(self, input, input_mask=None):
        return None

    def call(self, inputs, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim
        
        # inputs: [batch_size, seq_len, features_dim]
        # eij = [batch_size * seq_len, features_dim] dot [features_dim, 1] = [batch_size * seq_len, 1]
        # eij = [batch_size, seq_len]
        eij = K.dot(K.reshape(inputs, (-1, features_dim)), K.reshape(self.W, (features_dim, 1)))
        eij = K.reshape(eij, (-1, step_dim))
        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        # [batch_size, seq_len]
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        # [batch_size, seq_len, features_dim] * [batch_size, seq_len, 1] 
        a = K.expand_dims(a)
        weighted_input = inputs * a
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        return input_shape[0], self.features_dim

In [None]:
# Autoint

"""
    Input shape
    - [batch_size, field_nums, embedding_size]
    Output shape
    - [batch_size, field_nums, att_embedding_size * head_num]
"""

class InteractingLayer(Layer):
    def __init__(self, att_embedding_size=8, head_num=2, use_res=True, **kwargs):
        self.att_embedding_size = att_embedding_size
        self.head_num = head_num
        self.use_res = use_res
        super(InteractingLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        embedding_size = int(input_shape[-1])
        self.W_Query = self.add_weight(name='query', 
                                       shape=[embedding_size, self.att_embedding_size * self.head_num],
                                       dtype=tf.float32,
                                       initializer=tf.keras.initializers.TruncatedNormal())
        self.W_key = self.add_weight(name='key', 
                                     shape=[embedding_size, self.att_embedding_size * self.head_num],
                                     dtype=tf.float32,
                                     initializer=tf.keras.initializers.TruncatedNormal())
        self.W_Value = self.add_weight(name='value', 
                                       shape=[embedding_size, self.att_embedding_size * self.head_num],
                                       dtype=tf.float32,
                                       initializer=tf.keras.initializers.TruncatedNormal())
        if self.use_res:
            self.W_Res = self.add_weight(name='res', 
                                         shape=[embedding_size, self.att_embedding_size * self.head_num],
                                         dtype=tf.float32,
                                         initializer=tf.keras.initializers.TruncatedNormal())
        super(InteractingLayer, self).build(input_shape)

    def call(self, inputs, **kwargs):
        querys = tf.tensordot(inputs, self.W_Query, axes=(-1, 0))
        keys = tf.tensordot(inputs, self.W_key, axes=(-1, 0))
        values = tf.tensordot(inputs, self.W_Value, axes=(-1, 0))

        querys = tf.stack(tf.split(querys, self.head_num, axis=2))
        keys = tf.stack(tf.split(keys, self.head_num, axis=2))
        values = tf.stack(tf.split(values, self.head_num, axis=2))

        inner_product = tf.matmul(querys, keys, transpose_b=True)
        self.normalized_att_scores = tf.nn.softmax(inner_product)

        result = tf.matmul(self.normalized_att_scores, values)
        result = tf.concat(tf.split(result, self.head_num, ), axis=-1)
        result = tf.squeeze(result, axis=0)

        if self.use_res:
            result += tf.tensordot(inputs, self.W_Res, axes=(-1, 0))
        result = tf.nn.relu(result)

        return result

    def compute_output_shape(self, input_shape):
        return (None, input_shape[1], self.att_embedding_size * self.head_num)

    def get_config(self, ):
        config = {'att_embedding_size': self.att_embedding_size, 'head_num': self.head_num, 'use_res': self.use_res}
        base_config = super(InteractingLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [None]:
# Capsule

"""
    Input shape
    - [batch_size, field_nums, embedding_size]
    Output shape
    - [batch_size, num_capsule, dim_capsule]
"""

def squash(x, axis=-1):
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True)
    scale = K.sqrt(s_squared_norm + K.epsilon())
    return x / scale

class Capsule(Layer):
    def __init__(self, num_capsule, dim_capsule, routings=3, kernel_size=(9, 1), share_weights=True,
                 activation='default', **kwargs):
        super(Capsule, self).__init__(**kwargs)
        self.num_capsule = num_capsule
        self.dim_capsule = dim_capsule
        self.routings = routings
        self.kernel_size = kernel_size
        self.share_weights = share_weights
        if activation == 'default':
            self.activation = squash
        else:
            self.activation = Activation(activation)

    def build(self, input_shape):
        super(Capsule, self).build(input_shape)
        input_dim_capsule = int(input_shape[-1])
        if self.share_weights:
            self.W = self.add_weight(name='capsule_kernel',
                                     shape=(1, input_dim_capsule,
                                            self.num_capsule * self.dim_capsule),
                                     # shape=self.kernel_size,
                                     initializer='glorot_uniform',
                                     trainable=True)
        else:
            input_num_capsule = int(input_shape[-2])
            self.W = self.add_weight(name='capsule_kernel',
                                     shape=(input_num_capsule,
                                            input_dim_capsule,
                                            self.num_capsule * self.dim_capsule),
                                     initializer='glorot_uniform',
                                     trainable=True)

    def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule,
                                            self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))

        b = K.zeros_like(u_hat_vecs[:, :, :, 0])  # shape = [None, num_capsule, input_num_capsule]
        #动态路由部分
        for i in range(self.routings):
            b = K.permute_dimensions(b, (0, 2, 1))  # shape = [None, input_num_capsule, num_capsule]
            c = K.softmax(b)
            c = K.permute_dimensions(c, (0, 2, 1))
            b = K.permute_dimensions(b, (0, 2, 1))
            outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(outputs, u_hat_vecs, [2, 3])

        return outputs

    def compute_output_shape(self, input_shape):
        return (None, self.num_capsule, self.dim_capsule)

In [None]:
# AFM

"""
    Input shape
    - A list of [batch_size, 1, embedding_size]
    Output shape
    - [batch_size, 1]
"""

import itertools

class AFMLayer(Layer):
    def __init__(self, attention_factor=4, **kwargs):
        self.attention_factor = attention_factor
        super(AFMLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        shape_set = set()
        reduced_input_shape = [shape.as_list() for shape in input_shape]
        for i in range(len(input_shape)):
            shape_set.add(tuple(reduced_input_shape[i]))

        embedding_size = int(input_shape[0][-1])

        self.attention_W = self.add_weight(shape=(embedding_size, self.attention_factor), 
                                           initializer=glorot_normal(),
                                           name="attention_W")
        self.attention_b = self.add_weight(shape=(self.attention_factor,), 
                                           initializer=Zeros(), 
                                           name="attention_b")
        self.projection_h = self.add_weight(shape=(self.attention_factor, 1),
                                            initializer=glorot_normal(), 
                                            name="projection_h")
        self.projection_p = self.add_weight(shape=(embedding_size, 1), 
                                            initializer=glorot_normal(), 
                                            name="projection_p")
        self.tensordot = tf.keras.layers.Lambda(
            lambda x: tf.tensordot(x[0], x[1], axes=(-1, 0)))
        super(AFMLayer, self).build(input_shape)

    def call(self, inputs, training=None, **kwargs):
        embeds_vec_list = inputs
        row = []
        col = []

        for r, c in itertools.combinations(embeds_vec_list, 2):
            row.append(r)
            col.append(c)

        p = tf.concat(row, axis=1)
        q = tf.concat(col, axis=1)
        inner_product = p * q

        bi_interaction = inner_product
        attention_temp = tf.nn.relu(tf.nn.bias_add(tf.tensordot(
            bi_interaction, self.attention_W, axes=(-1, 0)), self.attention_b))
        
        self.normalized_att_score = tf.nn.softmax(tf.tensordot(
            attention_temp, self.projection_h, axes=(-1, 0)), dim=1)
        attention_output = tf.reduce_sum(
            self.normalized_att_score * bi_interaction, axis=1)

        afm_out = self.tensordot([attention_output, self.projection_p])
        return afm_out

    def compute_output_shape(self, input_shape):
        return (None, 1)

    def get_config(self, ):
        config = {'attention_factor': self.attention_factor}
        base_config = super(AFMLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [None]:
# MMoE

"""
    Input shape
    - [batch_size, feature_dims]
    Output shape
    - task_nums * [batch_size, hidden_units]

"""

class MMoE(Layer):
    def __init__(self, hidden_units=64, expert_nums=4, task_nums=2, **kwargs):
        self.hidden_units = hidden_units
        self.expert_nums = expert_nums
        self.task_nums = task_nums
        super(MMoE, self).__init__(**kwargs)
        
    def build(self, input_shape):
        self.expert_layers = [Dense(self.hidden_units, activation='relu') for _ in range(self.expert_nums)]
        self.gate_layers = [Dense(self.expert_nums, activation='softmax') for _ in range(self.task_nums)]
        
    def call(self, inputs):
        expert_outputs, gate_outputs, final_outputs = [], [], []
        for expert_layer in self.expert_layers:
            expert_output = tf.expand_dims(expert_layer(inputs), axis=2)
            expert_outputs.append(expert_output)
        expert_outputs = tf.concat(expert_outputs, axis=2)

        for gate_layer in self.gate_layers:
            gate_outputs.append(gate_layer(inputs))

        for gate_output in gate_outputs:
            expanded_gate_output = tf.expand_dims(gate_output, axis=1)
            weighted_expert_output = expert_outputs * expanded_gate_output
            task_output = tf.reduce_sum(weighted_expert_output, axis=2)
            final_outputs.append(task_output)
        
        return final_outputs