In [1]:
!pip install deepctr

Collecting deepctr
  Downloading deepctr-0.7.3-py3-none-any.whl (79 kB)
[K     |████████████████████████████████| 79 kB 2.1 MB/s 
Installing collected packages: deepctr
Successfully installed deepctr-0.7.3


In [2]:
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from deepctr.inputs import  SparseFeat, DenseFeat, get_feature_names
from tensorflow.keras.models import Model, load_model

from deepctr.models import CCPM
from deepctr.models import FNN, PNN, NFM, AFM
from deepctr.models import MLR
from deepctr.models import WDL, DeepFM, DCN, xDeepFM
from deepctr.models import DIN, DIEN, DSIN
from deepctr.models import AutoInt
from deepctr.models import ONN
from deepctr.models import FGCNN
from deepctr.models import FiBiNET

from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, Callback
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
from tensorflow.keras.utils import get_custom_objects
from tensorflow.keras.optimizers import Adam,RMSprop
from tensorflow.keras.layers import Activation
from tensorflow.keras import backend as K
from tensorflow.keras import callbacks
from tensorflow.keras import utils
import tensorflow.keras as keras
import tensorflow as tf
import pandas as pd
import numpy as np
import time
import warnings
warnings.simplefilter('ignore')

In [3]:
train = pd.read_csv('/kaggle/input/rs6-attrition-predict/train.csv')
test = pd.read_csv('/kaggle/input/rs6-attrition-predict/test.csv')

In [4]:
train.Attrition = train.Attrition.map({'No':0, 'Yes':1})

In [5]:
test['Attrition'] = -1

In [6]:
data = pd.concat([train, test]).reset_index(drop=True)
data.drop(['Over18', 'StandardHours'], axis=1, inplace=True)

In [7]:
data.head(3)

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager,user_id
0,58,0,Travel_Rarely,605,Sales,21,3,Life Sciences,1,1938,...,3,1,29,2,2,1,0,0,0,1374
1,45,0,Travel_Rarely,950,Research & Development,28,3,Technical Degree,1,1546,...,4,1,8,3,3,5,4,0,3,1092
2,40,0,Travel_Rarely,300,Sales,26,3,Marketing,1,1066,...,2,1,8,3,2,7,7,7,5,768


In [8]:
dense_features = ['Age', 'DailyRate', 'MonthlyIncome', 'NumCompaniesWorked', 'PercentSalaryHike', 'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion', 'YearsWithCurrManager']
sparse_features = [feature for feature in data.columns if feature not in dense_features + ['user_id', 'Attrition']]

In [9]:
for idx, feat in enumerate(sparse_features):
    lbe = LabelEncoder()
    data[feat] = lbe.fit_transform(data[feat].fillna('-1').astype(str).values)
    if idx % 5 == 0:
        print(data[feat])

0       2
1       2
2       2
3       0
4       1
       ..
1465    2
1466    2
1467    2
1468    2
1469    2
Name: BusinessTravel, Length: 1470, dtype: int64
0       0
1       0
2       0
3       0
4       0
       ..
1465    0
1466    0
1467    0
1468    0
1469    0
Name: EmployeeCount, Length: 1470, dtype: int64
0       2
1       2
2       2
3       1
4       0
       ..
1465    1
1466    1
1467    0
1468    2
1469    2
Name: JobInvolvement, Length: 1470, dtype: int64
0         95
1       1100
2        709
3        239
4       1312
        ... 
1465     149
1466     748
1467     613
1468    1267
1469    1126
Name: MonthlyRate, Length: 1470, dtype: int64
0       22
1       38
2       38
3        2
4        1
        ..
1465     2
1466    36
1467     4
1468     6
1469    10
Name: TotalWorkingYears, Length: 1470, dtype: int64


In [10]:
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])

In [11]:
train = data[data.Attrition != -1].reset_index(drop=True)
test = data[data.Attrition == -1].reset_index(drop=True)

In [12]:
train.head(3)

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager,user_id
0,0.952381,0,2,0.360057,2,13,2,1,0,734,...,2,1,22,2,1,0.025,0.0,0.0,0.0,1374
1,0.642857,0,2,0.607015,1,20,2,5,0,424,...,3,1,38,3,2,0.125,0.222222,0.0,0.176471,1092
2,0.52381,0,2,0.141732,2,18,2,2,0,60,...,1,1,38,3,1,0.175,0.388889,0.466667,0.294118,768


In [13]:
fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique()) for feat in sparse_features] + [DenseFeat(feat, 1, ) for feat in dense_features] 

dnn_feature_columns = fixlen_feature_columns
linear_feature_columns = fixlen_feature_columns
ccpm_dnn_feature_columns = [SparseFeat(feat, data[feat].nunique()) for feat in sparse_features] 
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
feature_names

['BusinessTravel',
 'Department',
 'DistanceFromHome',
 'Education',
 'EducationField',
 'EmployeeCount',
 'EmployeeNumber',
 'EnvironmentSatisfaction',
 'Gender',
 'HourlyRate',
 'JobInvolvement',
 'JobLevel',
 'JobRole',
 'JobSatisfaction',
 'MaritalStatus',
 'MonthlyRate',
 'OverTime',
 'PerformanceRating',
 'RelationshipSatisfaction',
 'StockOptionLevel',
 'TotalWorkingYears',
 'TrainingTimesLastYear',
 'WorkLifeBalance',
 'Age',
 'DailyRate',
 'MonthlyIncome',
 'NumCompaniesWorked',
 'PercentSalaryHike',
 'YearsAtCompany',
 'YearsInCurrentRole',
 'YearsSinceLastPromotion',
 'YearsWithCurrManager']

In [14]:
def auc(y_true, y_pred):
    def fallback_auc(y_true, y_pred):
        try:
            return roc_auc_score(y_true, y_pred)
        except:
            return 0.5
    return tf.py_function(fallback_auc, (y_true, y_pred), tf.double)

**CE**
$$CE(p_t)=-log(p_t)$$
**FL**
$$FL(p_t) = -(1-p_t)^\gamma log(p_t)$$
**$\alpha$-balanced**
$$FL(p_t)=-\alpha_t(1-p_t)^\gamma log(p_t)$$
上式可以减少易分样本的权重，使得模型更加关注难分的样本。具体的来讲，当是正样本时，若样本越易分类，其$p_t$很大，此时的$1-p_t$相当于是惩罚项，将会变得很小，由于$\gamma$是一个大于1的数。同理当样本是负样本的时候。

当是多分类情况时，和上边的讨论一致，其中$p_t$仅是预测为某一类的概率值。

In [15]:
K.epsilon()

1e-07

In [16]:
# https://arxiv.org/abs/1708.02002 
def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        # tf.where(input, a, b): 其中a和b均为size一样的tensor,作用是将a中的input中的true的位置的元素不变，其余元素进行替换为b中对应的元素
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.mean(alpha * K.pow(1. - pt_1, gamma) * K.log(K.epsilon() + pt_1))-K.mean((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0 + K.epsilon()))
    return focal_loss_fixed

get_custom_objects().update({'focal_loss_fn': focal_loss()})

**Gaussian Error Linerar Units(GELUS)**:a activation function using in Bert/Tranfromer

假设为正太分布的情况下，其公式如下
$$GELU(x)=0.5x(1+tanh(\sqrt{(2/\pi)}(x+0.044715x^3)))$$

In [17]:
# https://arxiv.org/pdf/1606.08415.pdf
def custom_gelu(x):
    return 0.5 * x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))

get_custom_objects().update({'custom_gelu': Activation(custom_gelu)})

**Mish**: Activation Function
$$mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + e^{x}))$$

In [18]:
# https://arxiv.org/ftp/arxiv/papers/1908/1908.08681.pdf
class Mish(Activation):
    '''
    Mish Activation Function.
    .. math::
        mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + e^{x}))
    Shape:
        - Input: Arbitrary. Use the keyword argument `input_shape`
        (tuple of integers, does not include the samples axis)
        when using this layer as the first layer in a model.
        - Output: Same shape as the input.
    Examples:
        >>> X = Activation('Mish', name="conv1_act")(X_input)
    '''

    def __init__(self, activation, **kwargs):
        super(Mish, self).__init__(activation, **kwargs)
        self.__name__ = 'Mish'


def mish(inputs):
    return inputs * tf.math.tanh(tf.math.softplus(inputs))


get_custom_objects().update({'Mish': Mish(mish)})

In [19]:
class WarmUpLearningRateScheduler(tf.keras.callbacks.Callback):
    def __init__(self, warmup_batches, init_lr, verbose=0):
        super(WarmUpLearningRateScheduler, self).__init__()
        self.warmup_batches = warmup_batches
        self.init_lr = init_lr
        self.verbose = verbose
        self.batch_count = 0
        self.learning_rates = []
        
    def on_batch_end(self, batch, logs=None):
        self.batch_count = self.batch_count + 1
        lr = K.get_value(self.model.optimizer.lr)
        self.learning_rates.append(lr)
    
    def on_batch_begin(self, batch, logs=None):
        if self.batch_count <= self.warmup_batches:
            lr = self.batch_count * self.init_lr / self.warmup_batches
            K.set_value(self.model.optimizer.lr, lr)
            if self.verbose > 0:
                print('\nBatch %05d: WarmUpLearningRateScheduler setting learning to %s.'% (self.batch_count + 1, lr))

**Learning Rate**

 Cyclical Learning Rates for Training Neural Networks:https://arxiv.org/pdf/1506.01186.pdf

In [20]:
class CyclicLR(keras.callbacks.Callback):

    def __init__(self, base_lr=0.001, max_lr=0.006, step_size=2000., mode='triangular',
                 gamma=1., scale_fn=None, scale_mode='cycle'):
        super(CyclicLR, self).__init__()

        self.base_lr = base_lr
        self.max_lr = max_lr
        self.step_size = step_size
        self.mode = mode
        self.gamma = gamma
        if scale_fn == None:
            if self.mode == 'triangular':
                self.scale_fn = lambda x: 1.
                self.scale_mode = 'cycle'
            elif self.mode == 'triangular2':
                self.scale_fn = lambda x: 1 / (2. ** (x - 1))
                self.scale_mode = 'cycle'
            elif self.mode == 'exp_range':
                self.scale_fn = lambda x: gamma ** (x)
                self.scale_mode = 'iterations'
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode
        self.clr_iterations = 0.
        self.trn_iterations = 0.
        self.history = {}

        self._reset()

    def _reset(self, new_base_lr=None, new_max_lr=None,
               new_step_size=None):
        """Resets cycle iterations.
        Optional boundary/step size adjustment.
        """
        if new_base_lr != None:
            self.base_lr = new_base_lr
        if new_max_lr != None:
            self.max_lr = new_max_lr
        if new_step_size != None:
            self.step_size = new_step_size
        self.clr_iterations = 0.

    def clr(self):
        cycle = np.floor(1 + self.clr_iterations / (2 * self.step_size))
        x = np.abs(self.clr_iterations / self.step_size - 2 * cycle + 1)
        if self.scale_mode == 'cycle':
            return self.base_lr + (self.max_lr - self.base_lr) * np.maximum(0, (1 - x)) * self.scale_fn(cycle)
        else:
            return self.base_lr + (self.max_lr - self.base_lr) * np.maximum(0, (1 - x)) * self.scale_fn(
                self.clr_iterations)

    def on_train_begin(self, logs={}):
        logs = logs or {}

        if self.clr_iterations == 0:
            K.set_value(self.model.optimizer.lr, self.base_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.clr())

    def on_batch_end(self, epoch, logs=None):

        logs = logs or {}
        self.trn_iterations += 1
        self.clr_iterations += 1

        K.set_value(self.model.optimizer.lr, self.clr())

In [21]:
target = ['Attrition']
N_Splits = 5
Verbose = 0
Epochs = 10
SEED = 2020
Batch_S_T = 8
Batch_S_P = 64


**DeepFM**
github: https://github.com/shenweichen/DeepCTR

https://arxiv.org/pdf/1703.04247.pdf

**FM**基本公式为 $y_{FM} = sigmoid(w_0 + \sum_{i=1}^N w_ix_i + \sum_{i=1}^N \sum_{j=i+1}^N<v_i, v_j> x_ix_j)$，前两项（$w_0+\sum_{i=1}^N w_i x_i$）其实是Logisitic Regression，最后一项利用项的合并可以进一步推导为$\frac{1}{2} \sum_{f=1}^k((\sum_{i=1}^n v_{i,f}x_i)^2 - \sum_{i=1}^n v_{i,f}^2 x_i^2)$，此时的计算复杂度为$O(kn^2)$，参数为$kn+n+1$。

**CCPM** 使用CNN来获取特征


***串联模型***

**FNN**是对FM模型得到的embedding向量，做了一次特征工程，然后使其送入分类器，其中每个field $i$中的每个feature之间共用一个权重$W_0^i$，然后把权重和值的乘积以及$w_0$（Logisitic Regression的权重）再次喂入上一层（每一层的基本单元包括权重和偏置）。

**NFM**通过逐元素乘法实现了*FM*，其主要贡献在于在神经网络连接层中添入了Bi-Interaction Layer，此层的主要操作为$f_{BI}(V_x) = \frac{1}{2} [(\sum_{i=1}^n x_i v_i)^2 - \sum_{i=1}^n(x_iv_i)^2]$（这个对应于FM的第三项）。在这个过程中，FM可以看成是不含隐藏层的NFM。
- 简化版NFM为
$$\bar{y}_{NFM-0}=w_0+\sum_{i=1}^n w_i x_i + h^T \sum_{i=1}^n \sum_{j=i+1}^n x_iv_i \odot x_iv_j$$

**AFM**对简化版NFM进行加权求和，其中权重公式是通过attention网络计算出来的，具体公式为
$$\bar{a}_{ij} = h^T RELU(W(v_i \odot v_j)x_ix+j + b)$$
$$a_{ij} = \frac{exp(\bar{a}_{ij})}{\sum_{(i,j)\in R_m }exp(\bar{a}_{ij})}$$
最终的计算公式为$\bar{y}_{AFM}(x)=w_0+\sum_{i=1}^n w_i x_i + p^T \sum_{i=1}^n \sum_{j=i+1}^n a_{ij} (v_i \odot v_j) x_i x_j$

**PNN**通过改进向量乘法运算延迟FM的实现过程。其中比*FNN*多出quadratic signals $l_p$的计算，这个向量可以通过内积或者外积的方式计算得到。
- 内积，实现两个向量的点积，最终得到的是一个数，基本思路就是将所有的field的embedding表示两两相乘，可以得到长度为$field\_size * (field\_size - 1)/2$长度的向量$l_p$。
- 外积，实现两个向量的外积，得到一个$embedding\_size * embedding\_size$的矩阵，和内积的方式一样，矩阵个数就是内积的长度。

***并联模型***

**DeepFM**是FM和MLP并联结合

**DCN**高阶FM的降维实现，相当于Cross Network的输出为$x_0$和一个数相乘，其特征交叉还是以bit-wise的方式构建的，$x_{l+1} = x_0x_l^Tw_l + b_l + x_l = f(x_l, w_l, b_l) + x_l$，其中$x_0=[x_{embed, 1}^T, \cdots, x_{embed,k}^T, x_{dense}^T]$

**Wide&Deep**是DeepFM和DCN的基础框架

**xDeepFM**

**FGCNN**

***Multi-head self-attention***

**AUTOINT**

**DIN**

**DIEN**

**DSIN**

**FIBINET**

**WRL**

**ONN**

**总结**
- 特征交叉方式分为两种，bit-wise和vector-wise，其中bit表示的是向量中的某个元素，所以同一个field的元素也是可以进行交叉的，但是解释性不强，如FNN、PNN和DeepFM，其缺点是模型学习出来的是隐式的交互特征，形式未知；vector-wise是向量级的交叉，在引入高阶的vector-wise的交叉特征，同时又能控制模型的复杂度，避免产生过多的无效交叉特征，因此可以通过一定的压缩机制把高阶向量组合压缩到一个合理的范围内，同时又尽可能的保留有效的交叉特征，其中DCN引入cross层，对向量$x_0$做乘数变化，同时也是bit-wise的交叉方式，xDeepFM模型是自动构建交叉特征且能够端到端学习的集大成者，引入CIN网络，将二阶交叉、三阶交叉，一直到$k+1$阶交叉特征都放入到输入中去。
- 网络架构，

In [22]:
def model_deepctr(model, model_name):
    oof_pred_deepfm = np.zeros((len(train), ))
    y_pred_deepfm = np.zeros((len(test), ))


    skf = StratifiedKFold(n_splits=N_Splits, shuffle=True, random_state=SEED)
    start = time.time()
    for fold, (tr_ind, val_ind) in enumerate(skf.split(train, train[target])):
        X_train, X_val = train[feature_names].iloc[tr_ind], train[feature_names].iloc[val_ind]
        y_train, y_val = train[target].iloc[tr_ind], train[target].iloc[val_ind]
        train_model_input = {name:X_train[name] for name in feature_names}
        val_model_input = {name:X_val[name] for name in feature_names}
        test_model_input = {name:test[name] for name in feature_names}
        model.compile('adam', loss = 'focal_loss_fn', metrics=[auc], )# binary_crossentropy
        es = callbacks.EarlyStopping(monitor='val_auc', min_delta=0.001, patience=3, verbose=Verbose, mode='max', baseline=None, restore_best_weights=True)
        sb = callbacks.ModelCheckpoint('./nn_model.w8', save_weights_only=True, save_best_only=True, verbose=Verbose)
        clr = CyclicLR(base_lr=1e-3, max_lr = 3.5e-3, step_size= int(1.0*(test.shape[0])/(Batch_S_T*4)) , mode='exp_range', gamma=1.0, scale_fn=None, scale_mode='cycle')
        history = model.fit(train_model_input, y_train,
                            validation_data=(val_model_input, y_val),
                            batch_size=Batch_S_T, epochs=Epochs, verbose=Verbose,
                            callbacks=[es, sb, clr],)
        model.load_weights('./nn_model.w8')
        val_pred = model.predict(val_model_input, batch_size=Batch_S_P)
        print(f'validation AUC fold {fold+1} : {round(roc_auc_score(y_val, val_pred), 5)}')
        oof_pred_deepfm[val_ind] = val_pred.ravel()
        y_pred_deepfm += model.predict(test_model_input, batch_size=Batch_S_P).ravel() / (N_Splits)
        K.clear_session()
    cost_time = time.time() - start 
    auc_score = round(roc_auc_score(train.Attrition.values, oof_pred_deepfm), 5)
    print(f'OOF AUC : {auc_score}')
    test_idx = test.user_id.values
    submission = pd.DataFrame.from_dict({
        'user_id': test_idx,
        'Attrition': y_pred_deepfm
    })
    submission.to_csv(f'{model_name}_submission.csv', index=False)
    return cost_time, auc_score

In [23]:
model = DeepFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128, 128), dnn_dropout=0.0, dnn_activation='tanh', dnn_use_bn=False, task='binary', )
deepfm_time, auc_score = model_deepctr(model, 'deepfm')

validation AUC fold 1 : 0.84304
validation AUC fold 2 : 0.90841
validation AUC fold 3 : 0.83661
validation AUC fold 4 : 0.94136
validation AUC fold 5 : 0.82567
OOF AUC : 0.85179


In [24]:
model = CCPM(linear_feature_columns, ccpm_dnn_feature_columns, dnn_hidden_units=(256, 256, 256), dnn_dropout=0.0, task='binary', )
ccpm_time, ccpm_auc_score = model_deepctr(model, 'ccpm')

validation AUC fold 1 : 0.7694
validation AUC fold 2 : 0.91974
validation AUC fold 3 : 0.82501
validation AUC fold 4 : 0.94243
validation AUC fold 5 : 0.84504
OOF AUC : 0.86347


In [25]:
model = FNN(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 256), dnn_dropout=0.0, dnn_activation='tanh', task='binary', )
fnn_time, fnn_auc_score = model_deepctr(model, 'fnn')

validation AUC fold 1 : 0.87294
validation AUC fold 2 : 0.92629
validation AUC fold 3 : 0.84794
validation AUC fold 4 : 0.94029
validation AUC fold 5 : 0.81419
OOF AUC : 0.84342


In [26]:
model = PNN(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128), dnn_dropout=0.0, dnn_activation='tanh', task='binary', )
pnn_time, pnn_auc_score = model_deepctr(model, 'pnn')

TypeError: Expected int32, got 'BusinessTravel' of type 'str' instead.

In [27]:
model = WDL(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128, 128, 128), dnn_dropout=0.0, dnn_activation='tanh', task='binary', )
wdl_time, wdl_auc_score = model_deepctr(model, 'wdl')

validation AUC fold 1 : 0.75332
validation AUC fold 2 : 0.92151
validation AUC fold 3 : 0.86364
validation AUC fold 4 : 0.92907
validation AUC fold 5 : 0.8206
OOF AUC : 0.83607


In [28]:
model = MLR(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128, 128, 128), dnn_dropout=0.0, dnn_activation='tanh', dnn_use_bn=False, task='binary', )
mlr_time, mlr_auc_score = model_deepctr(model, 'mlr')

TypeError: MLR() got an unexpected keyword argument 'dnn_hidden_units'

In [29]:
model = NFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128, 128, 128), dnn_dropout=0.0, dnn_activation='tanh', dnn_use_bn=False, task='binary', )
nfm_time, nfm_auc_score = model_deepctr(model, 'nfm')

TypeError: NFM() got an unexpected keyword argument 'dnn_use_bn'

In [30]:
model = AFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128, 128, 128), dnn_dropout=0.0, dnn_activation='tanh', dnn_use_bn=False, task='binary', )
afm_time, afm_auc_score = model_deepctr(model, 'afm')

TypeError: AFM() got an unexpected keyword argument 'dnn_hidden_units'

In [31]:
model = DCN(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128, 128, 128), dnn_dropout=0.0, dnn_activation='tanh', dnn_use_bn=False, task='binary', )
dcn_time, dcn_auc_score = model_deepctr(model, 'dcn')

validation AUC fold 1 : 0.811
validation AUC fold 2 : 0.9237
validation AUC fold 3 : 0.8392
validation AUC fold 4 : 0.9451
validation AUC fold 5 : 0.80083
OOF AUC : 0.85035


In [32]:
model = din(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128, 128, 128), dnn_dropout=0.0, dnn_activation='tanh', dnn_use_bn=False, task='binary', )
din_time, din_auc_score = model_deepctr(model, 'din')

NameError: name 'din' is not defined

In [33]:
model = dien(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128, 128, 128), dnn_dropout=0.0, dnn_activation='tanh', dnn_use_bn=False, task='binary', )
dien_time, dien_auc_score = model_deepctr(model, 'dien')

NameError: name 'dien' is not defined

In [34]:
model = DSIN(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128, 128, 128), dnn_dropout=0.0, dnn_activation='tanh', dnn_use_bn=False, task='binary', )
dsin_time, dsin_auc_score = model_deepctr(model, 'dsin')

ValueError: hist_emb_size must equal to att_embedding_size * att_head_num ,got 0 != 1 *8

In [35]:
model = xDeepFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128, 128, 128), dnn_dropout=0.0, dnn_activation='tanh', dnn_use_bn=False, task='binary', )
xdeepfm_time, xdeepfm_auc_score = model_deepctr(model, 'xdeepfm')

validation AUC fold 1 : 0.82709
validation AUC fold 2 : 0.89244
validation AUC fold 3 : 0.85435
validation AUC fold 4 : 0.9272
validation AUC fold 5 : 0.80283
OOF AUC : 0.85453


In [36]:
model = AutoInt(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128, 128, 128), dnn_dropout=0.0, dnn_activation='tanh', dnn_use_bn=False, task='binary', )
autoint_time, autoint_auc_score = model_deepctr(model, 'autoint')

validation AUC fold 1 : 0.8328
validation AUC fold 2 : 0.93107
validation AUC fold 3 : 0.85026
validation AUC fold 4 : 0.94349
validation AUC fold 5 : 0.82394
OOF AUC : 0.86168


In [37]:
model = ONN(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128, 128, 128, 128), dnn_dropout=0.0, dnn_activation='tanh', dnn_use_bn=False, task='binary', )
onn_time, onn_auc_score = model_deepctr(model, 'onn')

TypeError: ONN() got an unexpected keyword argument 'dnn_activation'

In [38]:
model = FGCNN(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 256, 128), dnn_dropout=0.0, dnn_activation='tanh', dnn_use_bn=False, task='binary', )
fgcnn_time, fgcnn_auc_score = model_deepctr(model, 'fgcnn')

TypeError: FGCNN() got an unexpected keyword argument 'dnn_activation'

In [39]:
model = FiBiNET(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 256), dnn_dropout=0.0, dnn_activation='tanh', task='binary', )
fibinet_time, fibinet_auc_score = model_deepctr(model, 'fibinet')

validation AUC fold 1 : 0.87068
validation AUC fold 2 : 0.9237
validation AUC fold 3 : 0.8893
validation AUC fold 4 : 0.91558
validation AUC fold 5 : 0.84157
OOF AUC : 0.86381


In [40]:
label = ['CCPM', 'FNN', 'PNN', 'WDL', 'DeepFM', 'MLR', 'NFM', 'AFM', 'DCN', 'DIN', 'DIEN', 'DSIN', 'xDeepFM', 'AutoInt', 'ONN', 'FGCNN', 'FiBiNET']

In [41]:
auc_value = [ccpm_auc_score, fnn_auc_score, pnn_auc_score, wdl_auc_score, 
             deepfm_auc_score, mlr_auc_score, nfm_auc_score, afm_auc_score,
             dcn_auc_score, din_auc_score, dien_auc_score, dsin_auc_score,
             xdeepfm_auc_score, autoint_auc_score, onn_auc_score, fgcnn_auc_score,
             fibinet_auc_score
            ]

figure, ax = plt.subplots(figsize=(16,4))

plt.bar(range(len(auc_value)), auc_value, tick_label=label)
for tick in ax.get_xticklabels():
    tick.set_rotation(90)
plt.title('Different AUC in Dataset by deepctr')
plt.show()

NameError: name 'pnn_auc_score' is not defined

In [42]:
time_value = [ccpm_time, fnn_time, pnn_time, wdl_time, 
             deepfm_time, mlr_time, nfm_time, afm_time,
             dcn_time, din_time, dien_time, dsin_time,
             xdeepfm_time, autoint_time, onn_time, fgcnn_time,
             fibinet_time
            ]
figure, ax = plt.subplots(figsize=(16,4))

plt.bar(range(len(time_value)), time_value, tick_label=label)
for tick in ax.get_xticklabels():
    tick.set_rotation(90)
plt.title('Different Time in Dataset by ML')
plt.show()

NameError: name 'pnn_time' is not defined