Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add afn model #520

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions deepctr/layers/interaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,34 @@
from .utils import concat_func, reduce_sum, softmax, reduce_mean
from .core import DNN

class CrossNetLayer(Layer):
def __init__(self, layer_num=2, **kwargs):
self.layer_num = layer_num
super(CrossNetLayer, self).__init__(**kwargs)

def build(self, input_shape):
dim = input_shape[-1]
self.kernels = [self.add_weight(name='kernel' + str(i), shape=(dim, 1),
initializer='glorot_uniform',
trainable=True) for i in range(self.layer_num)]
self.bias = [self.add_weight(name='bias' + str(i), shape=(dim, 1),
initializer='zeros',
trainable=True) for i in range(self.layer_num)]
super(CrossNetLayer, self).build(input_shape)

def call(self, inputs, **kwargs):
x_0 = tf.expand_dims(inputs, axis=2)
x_l = x_0
for i in range(self.layer_num):
xl_w = tf.tensordot(x_l, self.kernels[i], axes=(1, 0))
dot_ = tf.matmul(x_0, xl_w)
x_l = dot_ + self.bias[i] + x_l
x_l = tf.squeeze(x_l, axis=2)
return x_l

def compute_output_shape(self, input_shape):
return (None, input_shape[-1])


class AFMLayer(Layer):
"""Attentonal Factorization Machine models pairwise (order-2) feature
Expand Down
1 change: 1 addition & 0 deletions deepctr/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from .wdl import WDL
from .xdeepfm import xDeepFM
from .edcn import EDCN
from .afn import AFN

__all__ = ["AFM", "CCPM", "DCN", "IFM", "DIFM", "DCNMix", "MLR", "DeepFM", "MLR", "NFM", "DIN", "DIEN", "FNN", "PNN",
"WDL", "xDeepFM", "AutoInt", "ONN", "FGCNN", "DSIN", "FiBiNET", 'FLEN', "FwFM", "BST", "DeepFEFM",
Expand Down
62 changes: 62 additions & 0 deletions deepctr/models/afn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Input, Dense, Dropout, Concatenate, Activation, Reshape
from tensorflow.python.keras.regularizers import l2
from tensorflow.python.keras.initializers import TruncatedNormal
from ..feature_column import build_input_features, DEFAULT_GROUP_NAME, input_from_feature_columns
from ..layers.core import PredictionLayer
from ..layers.interaction import AFMLayer, CrossNetLayer
from ..layers.utils import concat_func


def AFN(linear_feature_columns, dnn_feature_columns, cross_num=2, cross_parameterization='vector',
l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_cross=1e-5, afn_dropout=0, seed=1024,
task='binary'):
"""Instantiates the Attentional Factorization Network architecture.
:param linear_feature_columns: An iterable containing all the features used by linear part of the model.
:param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
:param cross_num: int, the number of cross layers in the CrossNet.
:param cross_parameterization: str, one of "vector" or "matrix". Parameterization for the CrossNet.
:param l2_reg_linear: float. L2 regularizer strength applied to linear part
:param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
:param l2_reg_cross: float. L2 regularizer strength applied to CrossNet
:param afn_dropout: float in [0,1), Fraction of the CrossNet output units to dropout.
:param seed: integer, to use as random seed.
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""

features = build_input_features(
linear_feature_columns + dnn_feature_columns)

inputs_list = list(features.values())

group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding,
seed, support_dense=True, support_group=True)

linear_logit = Dense(1, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(concat_func(dense_value_list))

# CrossNet
cross_input = Concatenate(axis=1)(list(group_embedding_dict.values()))
for i in range(cross_num):
cross_input = CrossNetLayer(cross_parameterization, l2_reg=l2_reg_cross)(cross_input)
if afn_dropout:
cross_input = Dropout(afn_dropout)(cross_input)

# AFMLayer
afm_input = Concatenate(axis=1)(list(group_embedding_dict.values()))
afm_out = AFMLayer()(afm_input)

# Concatenate
dnn_input = Concatenate(axis=1)(list(group_embedding_dict.values()))
dnn_input = Reshape((len(dnn_feature_columns), -1))(dnn_input)

# DNN layers
hidden_layers = [dnn_input]
for i in range(3):
fc = Dense(128, activation='relu', kernel_initializer=TruncatedNormal(seed=seed))(hidden_layers[-1])
if afn_dropout:
fc = Dropout(afn_dropout)(fc)
hidden_layers.append(fc)

# Output
output = Concatenate(axis=1)([linear_logit, cross_input, afm_out, hidden_layers[-1]])
24 changes: 24 additions & 0 deletions tests/models/afn_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import pytest

from deepctr.models import AFN
from tests.utils import get_test_data, SAMPLE_SIZE, check_model, get_device

@pytest.mark.parametrize(
'afn_dnn_hidden_units, sparse_feature_num, dense_feature_num',
[((32, 16), 3, 0),
((32, 16), 3, 3),
((32, 16), 0, 3)]
)
def test_AFN(afn_dnn_hidden_units, sparse_feature_num, dense_feature_num):
model_name = 'AFN'
sample_size = SAMPLE_SIZE
x, y, feature_columns = get_test_data(
sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=dense_feature_num)

model = AFN(feature_columns, feature_columns, afn_dnn_hidden_units=afn_dnn_hidden_units, device=get_device())

check_model(model, model_name, x, y)


if __name__ == '__main__':
pass
21 changes: 21 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,3 +416,24 @@ def get_test_data_estimator(sample_size=1000, embedding_size=4, sparse_feature_n
def check_estimator(model, input_fn):
model.train(input_fn)
model.evaluate(input_fn)


def get_device(device_num=None):
"""
Get the PyTorch device to use for computation.

:param device_num: int, optional (default=None)
The index of the device to use. If None, the device with the most available memory will be selected.

:return: torch.device
The PyTorch device to use for computation.
"""
if device_num is not None:
device = torch.device("cuda:{}".format(device_num) if torch.cuda.is_available() else "cpu")
else:
# Get the device with the most available memory
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device.type == 'cuda':
mem_list = [torch.cuda.max_memory_allocated(i) for i in range(torch.cuda.device_count())]
device = torch.device("cuda:{}".format(mem_list.index(max(mem_list))))
return device