diff --git a/deepctr/layers/interaction.py b/deepctr/layers/interaction.py
index f76eda32..74df8bdf 100644
--- a/deepctr/layers/interaction.py
+++ b/deepctr/layers/interaction.py
@@ -29,6 +29,34 @@
 from .utils import concat_func, reduce_sum, softmax, reduce_mean
 from .core import DNN
 
+class CrossNetLayer(Layer):
+    def __init__(self, layer_num=2, **kwargs):
+        self.layer_num = layer_num
+        super(CrossNetLayer, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        dim = input_shape[-1]
+        self.kernels = [self.add_weight(name='kernel' + str(i), shape=(dim, 1),
+                                        initializer='glorot_uniform',
+                                        trainable=True) for i in range(self.layer_num)]
+        self.bias = [self.add_weight(name='bias' + str(i), shape=(dim, 1),
+                                     initializer='zeros',
+                                     trainable=True) for i in range(self.layer_num)]
+        super(CrossNetLayer, self).build(input_shape)
+
+    def call(self, inputs, **kwargs):
+        x_0 = tf.expand_dims(inputs, axis=2)
+        x_l = x_0
+        for i in range(self.layer_num):
+            xl_w = tf.tensordot(x_l, self.kernels[i], axes=(1, 0))
+            dot_ = tf.matmul(x_0, xl_w)
+            x_l = dot_ + self.bias[i] + x_l
+        x_l = tf.squeeze(x_l, axis=2)
+        return x_l
+
+    def compute_output_shape(self, input_shape):
+        return (None, input_shape[-1])
+
 
 class AFMLayer(Layer):
     """Attentonal Factorization Machine models pairwise (order-2) feature
diff --git a/deepctr/models/__init__.py b/deepctr/models/__init__.py
index 1d797e78..f6ec48c6 100644
--- a/deepctr/models/__init__.py
+++ b/deepctr/models/__init__.py
@@ -21,6 +21,7 @@
 from .wdl import WDL
 from .xdeepfm import xDeepFM
 from .edcn import EDCN
+from .afn import AFN
 
 __all__ = ["AFM", "CCPM", "DCN", "IFM", "DIFM", "DCNMix", "MLR", "DeepFM", "MLR", "NFM", "DIN", "DIEN", "FNN", "PNN",
            "WDL", "xDeepFM", "AutoInt", "ONN", "FGCNN", "DSIN", "FiBiNET", 'FLEN', "FwFM", "BST", "DeepFEFM",
diff --git a/deepctr/models/afn.py b/deepctr/models/afn.py
new file mode 100644
index 00000000..f64b1b9c
--- /dev/null
+++ b/deepctr/models/afn.py
@@ -0,0 +1,62 @@
+from tensorflow.python.keras.models import Model
+from tensorflow.python.keras.layers import Input, Dense, Dropout, Concatenate, Activation, Reshape
+from tensorflow.python.keras.regularizers import l2
+from tensorflow.python.keras.initializers import TruncatedNormal
+from ..feature_column import build_input_features, DEFAULT_GROUP_NAME, input_from_feature_columns
+from ..layers.core import PredictionLayer
+from ..layers.interaction import AFMLayer, CrossNetLayer
+from ..layers.utils import concat_func
+
+
+def AFN(linear_feature_columns, dnn_feature_columns, cross_num=2, cross_parameterization='vector',
+        l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_cross=1e-5, afn_dropout=0, seed=1024,
+        task='binary'):
+    """Instantiates the Attentional Factorization Network architecture.
+    :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
+    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
+    :param cross_num: int, the number of cross layers in the CrossNet.
+    :param cross_parameterization: str, one of "vector" or "matrix". Parameterization for the CrossNet.
+    :param l2_reg_linear: float. L2 regularizer strength applied to linear part
+    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
+    :param l2_reg_cross: float. L2 regularizer strength applied to CrossNet
+    :param afn_dropout: float in [0,1), Fraction of the CrossNet output units to dropout.
+    :param seed: integer, to use as random seed.
+    :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
+    :return: A Keras model instance.
+    """
+
+    features = build_input_features(
+        linear_feature_columns + dnn_feature_columns)
+
+    inputs_list = list(features.values())
+
+    group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding,
+                                                                         seed, support_dense=True, support_group=True)
+
+    linear_logit = Dense(1, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(concat_func(dense_value_list))
+
+    # CrossNet
+    cross_input = Concatenate(axis=1)(list(group_embedding_dict.values()))
+    for i in range(cross_num):
+        cross_input = CrossNetLayer(cross_parameterization, l2_reg=l2_reg_cross)(cross_input)
+        if afn_dropout:
+            cross_input = Dropout(afn_dropout)(cross_input)
+
+    # AFMLayer
+    afm_input = Concatenate(axis=1)(list(group_embedding_dict.values()))
+    afm_out = AFMLayer()(afm_input)
+
+    # Concatenate
+    dnn_input = Concatenate(axis=1)(list(group_embedding_dict.values()))
+    dnn_input = Reshape((len(dnn_feature_columns), -1))(dnn_input)
+
+    # DNN layers
+    hidden_layers = [dnn_input]
+    for i in range(3):
+        fc = Dense(128, activation='relu', kernel_initializer=TruncatedNormal(seed=seed))(hidden_layers[-1])
+        if afn_dropout:
+            fc = Dropout(afn_dropout)(fc)
+        hidden_layers.append(fc)
+
+    # Output
+    output = Concatenate(axis=1)([linear_logit, cross_input, afm_out, hidden_layers[-1]])
diff --git a/tests/models/afn_test.py b/tests/models/afn_test.py
new file mode 100644
index 00000000..fc060289
--- /dev/null
+++ b/tests/models/afn_test.py
@@ -0,0 +1,24 @@
+import pytest
+
+from deepctr.models import AFN
+from tests.utils import get_test_data, SAMPLE_SIZE, check_model, get_device
+
+@pytest.mark.parametrize(
+    'afn_dnn_hidden_units, sparse_feature_num, dense_feature_num',
+    [((32, 16), 3, 0),
+     ((32, 16), 3, 3),
+     ((32, 16), 0, 3)]
+)
+def test_AFN(afn_dnn_hidden_units, sparse_feature_num, dense_feature_num):
+    model_name = 'AFN'
+    sample_size = SAMPLE_SIZE
+    x, y, feature_columns = get_test_data(
+        sample_size, sparse_feature_num=sparse_feature_num, dense_feature_num=dense_feature_num)
+
+    model = AFN(feature_columns, feature_columns, afn_dnn_hidden_units=afn_dnn_hidden_units, device=get_device())
+
+    check_model(model, model_name, x, y)
+
+
+if __name__ == '__main__':
+    pass
\ No newline at end of file
diff --git a/tests/utils.py b/tests/utils.py
index f980a175..c7e82c1a 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -416,3 +416,24 @@ def get_test_data_estimator(sample_size=1000, embedding_size=4, sparse_feature_n
 def check_estimator(model, input_fn):
     model.train(input_fn)
     model.evaluate(input_fn)
+
+    
+def get_device(device_num=None):
+    """
+    Get the PyTorch device to use for computation.
+
+    :param device_num: int, optional (default=None)
+        The index of the device to use. If None, the device with the most available memory will be selected.
+
+    :return: torch.device
+        The PyTorch device to use for computation.
+    """
+    if device_num is not None:
+        device = torch.device("cuda:{}".format(device_num) if torch.cuda.is_available() else "cpu")
+    else:
+        # Get the device with the most available memory
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        if device.type == 'cuda':
+            mem_list = [torch.cuda.max_memory_allocated(i) for i in range(torch.cuda.device_count())]
+            device = torch.device("cuda:{}".format(mem_list.index(max(mem_list))))
+    return device
\ No newline at end of file