In [None]:
# default_exp models.afn

# AFN
> A pytorch implementation of AFN.

In [None]:
#hide
from nbdev.showdoc import *
from fastcore.nb_imports import *
from fastcore.test import *

## v1

In [None]:
#export
import torch
from torch import nn

from recohut.models.layers.embedding import EmbeddingLayer
from recohut.models.layers.common import MLP_Layer, LR_Layer

from recohut.models.bases.ctr import CTRModel

In [None]:
#export
class AFN(CTRModel):
    def __init__(self, 
                 feature_map, 
                 model_id="AFN",
                 task="binary_classification",
                 learning_rate=1e-3, 
                 embedding_initializer="torch.nn.init.normal_(std=1e-4)",
                 embedding_dim=10, 
                 ensemble_dnn=True,
                 dnn_hidden_units=[64, 64, 64], 
                 dnn_activations="ReLU",
                 dnn_dropout=0,
                 afn_hidden_units=[64, 64, 64], 
                 afn_activations="ReLU",
                 afn_dropout=0,
                 logarithmic_neurons=5,
                 batch_norm=True,
                 **kwargs):
        super(AFN, self).__init__(feature_map, 
                                           model_id=model_id,
                                           **kwargs)
        self.num_fields = feature_map.num_fields
        self.embedding_layer = EmbeddingLayer(feature_map, embedding_dim)
        self.coefficient_W = nn.Linear(self.num_fields, logarithmic_neurons, bias=False)
        self.dense_layer = MLP_Layer(input_dim=embedding_dim * logarithmic_neurons,
                                     output_dim=1, 
                                     hidden_units=afn_hidden_units,
                                     hidden_activations=afn_activations,
                                     output_activation=None, 
                                     dropout_rates=afn_dropout, 
                                     batch_norm=batch_norm, 
                                     use_bias=True)
        self.log_batch_norm = nn.BatchNorm1d(self.num_fields)
        self.exp_batch_norm = nn.BatchNorm1d(logarithmic_neurons)
        self.ensemble_dnn = ensemble_dnn
        if ensemble_dnn:
            self.embedding_layer2 = EmbeddingLayer(feature_map, 
                                                   embedding_dim, 
                                                   embedding_dropout)
            self.dnn = MLP_Layer(input_dim=embedding_dim * self.num_fields,
                                 output_dim=1, 
                                 hidden_units=dnn_hidden_units,
                                 hidden_activations=dnn_activations,
                                 output_activation=None, 
                                 dropout_rates=dnn_dropout, 
                                 batch_norm=batch_norm, 
                                 use_bias=True)
            self.fc = nn.Linear(2, 1)
        self.output_activation = self.get_final_activation(task)
        self.init_weights(embedding_initializer=embedding_initializer)

    def forward(self, inputs):
        feature_emb = self.embedding_layer(inputs)
        dnn_input = self.logarithmic_net(feature_emb)
        afn_out = self.dense_layer(dnn_input)
        if self.ensemble_dnn:
            feature_emb_list2 = self.embedding_layer2(X)
            concate_feature_emb = torch.cat(feature_emb_list2, dim=1)
            dnn_out = self.dnn(concate_feature_emb)
            y_pred = self.fc(torch.cat([afn_out, dnn_out], dim=-1))
        else:
            y_pred = afn_out

        if self.output_activation is not None:
            y_pred = self.output_activation(y_pred)
        return y_pred

    def logarithmic_net(self, feature_emb):
        feature_emb = torch.abs(feature_emb)
        feature_emb = torch.clamp(feature_emb, min=1e-5) # ReLU with min 1e-5 (better than 1e-7 suggested in paper)
        log_feature_emb = torch.log(feature_emb) # element-wise log 
        log_feature_emb = self.log_batch_norm(log_feature_emb) # batch_size * num_fields * embedding_dim 
        logarithmic_out = self.coefficient_W(log_feature_emb.transpose(2, 1)).transpose(1, 2)
        cross_out = torch.exp(logarithmic_out) # element-wise exp
        cross_out = self.exp_batch_norm(cross_out)  # batch_size * logarithmic_neurons * embedding_dim
        concat_out = torch.flatten(cross_out, start_dim=1)
        return concat_out

Example

In [None]:
params = {'model_id': 'AFN',
              'data_dir': '/content/data',
              'model_root': './checkpoints/',
              'learning_rate': 1e-3,
              'batch_norm': False,
              'optimizer': 'adamw',
              'task': 'binary_classification',
              'loss': 'binary_crossentropy',
              'metrics': ['logloss', 'AUC'],
              'embedding_dim': 10,
              'logarithmic_neurons': 1200,
              'afn_hidden_units': [400, 400, 400],
              'afn_activations': 'relu',
              'afn_dropout': 0,
              'ensemble_dnn': False,
              'dnn_hidden_units': [400, 400, 400],
              'dnn_activations': 'relu',
              'dnn_dropout': 0,
              'batch_size': 64,
              'epochs': 3,
              'shuffle': True,
              'seed': 2019,
              'use_hdf5': True,
              'workers': 1,
              'verbose': 0}

In [None]:
model = AFN(ds.dataset.feature_map, **params)

In [None]:
pl_trainer(model, ds, max_epochs=5)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name              | Type           | Params
-----------------------------------------------------
0 | embedding_layer   | EmbeddingLayer | 4.8 K 
1 | coefficient_W     | Linear         | 16.8 K
2 | dense_layer       | MLP_Layer      | 5.1 M 
3 | log_batch_norm    | BatchNorm1d    | 28    
4 | exp_batch_norm    | BatchNorm1d    | 2.4 K 
5 | output_activation | Sigmoid        | 0     
-----------------------------------------------------
5.1 M     Trainable params
0         Non-trainable params
5.1 M     Total params
20.582    Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Test Metrics': {'AUC': tensor(0.7091), 'logloss': tensor(0.3672)}}
--------------------------------------------------------------------------------


[{'Test Metrics': {'AUC': tensor(0.7091), 'logloss': tensor(0.3672)}}]

## v2

In [None]:
#export
import math
import torch
import torch.nn.functional as F

from recohut.models.layers.common import FeaturesEmbedding, FeaturesLinear, MultiLayerPerceptron

In [None]:
#exporti
class LNN(torch.nn.Module):
    """
    A pytorch implementation of LNN layer
    Input shape
        - A 3D tensor with shape: ``(batch_size,field_size,embedding_size)``.
    Output shape
        - 2D tensor with shape:``(batch_size,LNN_dim*embedding_size)``.
    Arguments
        - **in_features** : Embedding of feature.
        - **num_fields**: int.The field size of feature.
        - **LNN_dim**: int.The number of Logarithmic neuron.
        - **bias**: bool.Whether or not use bias in LNN.
    """
    def __init__(self, num_fields, embed_dim, LNN_dim, bias=False):
        super(LNN, self).__init__()
        self.num_fields = num_fields
        self.embed_dim = embed_dim
        self.LNN_dim = LNN_dim
        self.lnn_output_dim = LNN_dim * embed_dim
        self.weight = torch.nn.Parameter(torch.Tensor(LNN_dim, num_fields))
        if bias:
            self.bias = torch.nn.Parameter(torch.Tensor(LNN_dim, embed_dim))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()
    
    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, x):
        """
        :param x: Long tensor of size ``(batch_size, num_fields, embedding_size)``
        """
        embed_x_abs = torch.abs(x) # Computes the element-wise absolute value of the given input tensor.
        embed_x_afn = torch.add(embed_x_abs, 1e-7)
        # Logarithmic Transformation
        embed_x_log = torch.log1p(embed_x_afn) # torch.log1p and torch.expm1
        lnn_out = torch.matmul(self.weight, embed_x_log)
        if self.bias is not None:
            lnn_out += self.bias
        lnn_exp = torch.expm1(lnn_out)
        output = F.relu(lnn_exp).contiguous().view(-1, self.lnn_output_dim)
        return output

In [None]:
#export
class AFN_v2(torch.nn.Module):
    """
    A pytorch implementation of AFN.
    Reference:
        Cheng W, et al. Adaptive Factorization Network: Learning Adaptive-Order Feature Interactions, 2019.
    """
    def __init__(self, field_dims, embed_dim, LNN_dim, mlp_dims, dropouts):
        super().__init__()
        self.num_fields = len(field_dims)
        self.linear = FeaturesLinear(field_dims)    # Linear
        self.embedding = FeaturesEmbedding(field_dims, embed_dim)   # Embedding
        self.LNN_dim = LNN_dim
        self.LNN_output_dim = self.LNN_dim * embed_dim
        self.LNN = LNN(self.num_fields, embed_dim, LNN_dim)
        self.mlp = MultiLayerPerceptron(self.LNN_output_dim, mlp_dims, dropouts[0])

    def forward(self, x):
        """
        :param x: Long tensor of size ``(batch_size, num_fields)``
        """
        embed_x = self.embedding(x)
        lnn_out = self.LNN(embed_x)
        x = self.linear(x) + self.mlp(lnn_out)
        return torch.sigmoid(x.squeeze(1))

> **References**
> - Cheng W, et al. Adaptive Factorization Network: Learning Adaptive-Order Feature Interactions, 2019.
> - https://github.com/rixwew/pytorch-fm/blob/master/torchfm/model/afn.py

In [None]:
#hide
%reload_ext watermark
%watermark -a "Sparsh A." -m -iv -u -t -d -p recohut