diff --git a/deepctr/layers/interaction.py b/deepctr/layers/interaction.py index f76eda32..74df8bdf 100644 --- a/deepctr/layers/interaction.py +++ b/deepctr/layers/interaction.py @@ -29,6 +29,34 @@ from .utils import concat_func, reduce_sum, softmax, reduce_mean from .core import DNN +class CrossNetLayer(Layer): + def __init__(self, layer_num=2, **kwargs): + self.layer_num = layer_num + super(CrossNetLayer, self).__init__(**kwargs) + + def build(self, input_shape): + dim = input_shape[-1] + self.kernels = [self.add_weight(name='kernel' + str(i), shape=(dim, 1), + initializer='glorot_uniform', + trainable=True) for i in range(self.layer_num)] + self.bias = [self.add_weight(name='bias' + str(i), shape=(dim, 1), + initializer='zeros', + trainable=True) for i in range(self.layer_num)] + super(CrossNetLayer, self).build(input_shape) + + def call(self, inputs, **kwargs): + x_0 = tf.expand_dims(inputs, axis=2) + x_l = x_0 + for i in range(self.layer_num): + xl_w = tf.tensordot(x_l, self.kernels[i], axes=(1, 0)) + dot_ = tf.matmul(x_0, xl_w) + x_l = dot_ + self.bias[i] + x_l + x_l = tf.squeeze(x_l, axis=2) + return x_l + + def compute_output_shape(self, input_shape): + return (None, input_shape[-1]) + class AFMLayer(Layer): """Attentonal Factorization Machine models pairwise (order-2) feature diff --git a/deepctr/models/__init__.py b/deepctr/models/__init__.py index 1d797e78..f6ec48c6 100644 --- a/deepctr/models/__init__.py +++ b/deepctr/models/__init__.py @@ -21,6 +21,7 @@ from .wdl import WDL from .xdeepfm import xDeepFM from .edcn import EDCN +from .afn import AFN __all__ = ["AFM", "CCPM", "DCN", "IFM", "DIFM", "DCNMix", "MLR", "DeepFM", "MLR", "NFM", "DIN", "DIEN", "FNN", "PNN", "WDL", "xDeepFM", "AutoInt", "ONN", "FGCNN", "DSIN", "FiBiNET", 'FLEN', "FwFM", "BST", "DeepFEFM", diff --git a/deepctr/models/afn.py b/deepctr/models/afn.py new file mode 100644 index 00000000..f64b1b9c --- /dev/null +++ b/deepctr/models/afn.py @@ -0,0 +1,62 @@ +from tensorflow.python.keras.models import Model +from tensorflow.python.keras.layers import Input, Dense, Dropout, Concatenate, Activation, Reshape +from tensorflow.python.keras.regularizers import l2 +from tensorflow.python.keras.initializers import TruncatedNormal +from ..feature_column import build_input_features, DEFAULT_GROUP_NAME, input_from_feature_columns +from ..layers.core import PredictionLayer +from ..layers.interaction import AFMLayer, CrossNetLayer +from ..layers.utils import concat_func + + +def AFN(linear_feature_columns, dnn_feature_columns, cross_num=2, cross_parameterization='vector', + l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_cross=1e-5, afn_dropout=0, seed=1024, + task='binary'): + """Instantiates the Attentional Factorization Network architecture. + :param linear_feature_columns: An iterable containing all the features used by linear part of the model. + :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. + :param cross_num: int, the number of cross layers in the CrossNet. + :param cross_parameterization: str, one of "vector" or "matrix". Parameterization for the CrossNet. + :param l2_reg_linear: float. L2 regularizer strength applied to linear part + :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector + :param l2_reg_cross: float. L2 regularizer strength applied to CrossNet + :param afn_dropout: float in [0,1), Fraction of the CrossNet output units to dropout. + :param seed: integer, to use as random seed. + :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss + :return: A Keras model instance. + """ + + features = build_input_features( + linear_feature_columns + dnn_feature_columns) + + inputs_list = list(features.values()) + + group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, + seed, support_dense=True, support_group=True) + + linear_logit = Dense(1, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(concat_func(dense_value_list)) + + # CrossNet + cross_input = Concatenate(axis=1)(list(group_embedding_dict.values())) + for i in range(cross_num): + cross_input = CrossNetLayer(cross_parameterization, l2_reg=l2_reg_cross)(cross_input) + if afn_dropout: + cross_input = Dropout(afn_dropout)(cross_input) + + # AFMLayer + afm_input = Concatenate(axis=1)(list(group_embedding_dict.values())) + afm_out = AFMLayer()(afm_input) + + # Concatenate + dnn_input = Concatenate(axis=1)(list(group_embedding_dict.values())) + dnn_input = Reshape((len(dnn_feature_columns), -1))(dnn_input) + + # DNN layers + hidden_layers = [dnn_input] + for i in range(3): + fc = Dense(128, activation='relu', kernel_initializer=TruncatedNormal(seed=seed))(hidden_layers[-1]) + if afn_dropout: + fc = Dropout(afn_dropout)(fc) + hidden_layers.append(fc) + + # Output + output = Concatenate(axis=1)([linear_logit, cross_input, afm_out, hidden_layers[-1]]) diff --git a/tests/models/afn_test.py b/tests/models/afn_test.py new file mode 100644 index 00000000..fc060289 --- /dev/null +++ b/tests/models/afn_test.py @@ -0,0 +1,24 @@ +import pytest + +from deepctr.models import AFN +from tests.utils import get_test_data, SAMPLE_SIZE, check_model, get_device + +@pytest.mark.parametrize( + 'afn_dnn_hidden_units, sparse_feature_num, dense_feature_num', + [((32, 16), 3, 0), + ((32, 16), 3, 3), + ((32, 16), 0, 3)] +) +def test_AFN(afn_dnn_hidden_units, sparse_feature_num, dense_feature_num): + model_name = 'AFN' + sample_size = SAMPLE_SIZE + x, y, feature_columns = get_test_data( + sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=dense_feature_num) + + model = AFN(feature_columns, feature_columns, afn_dnn_hidden_units=afn_dnn_hidden_units, device=get_device()) + + check_model(model, model_name, x, y) + + +if __name__ == '__main__': + pass \ No newline at end of file diff --git a/tests/utils.py b/tests/utils.py index f980a175..c7e82c1a 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -416,3 +416,24 @@ def get_test_data_estimator(sample_size=1000, embedding_size=4, sparse_feature_n def check_estimator(model, input_fn): model.train(input_fn) model.evaluate(input_fn) + + +def get_device(device_num=None): + """ + Get the PyTorch device to use for computation. + + :param device_num: int, optional (default=None) + The index of the device to use. If None, the device with the most available memory will be selected. + + :return: torch.device + The PyTorch device to use for computation. + """ + if device_num is not None: + device = torch.device("cuda:{}".format(device_num) if torch.cuda.is_available() else "cpu") + else: + # Get the device with the most available memory + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + if device.type == 'cuda': + mem_list = [torch.cuda.max_memory_allocated(i) for i in range(torch.cuda.device_count())] + device = torch.device("cuda:{}".format(mem_list.index(max(mem_list)))) + return device \ No newline at end of file