In [1]:
!pip install deepctr

Collecting deepctr
  Downloading deepctr-0.7.2-py3-none-any.whl (79 kB)
[K     |████████████████████████████████| 79 kB 2.0 MB/s 
Installing collected packages: deepctr
Successfully installed deepctr-0.7.2


In [41]:
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from deepctr.inputs import  SparseFeat, DenseFeat, get_feature_names
from tensorflow.keras.models import Model, load_model
from deepctr.models import DeepFM
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, Callback
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
from tensorflow.keras.utils import get_custom_objects
from tensorflow.keras.optimizers import Adam,RMSprop
from tensorflow.keras.layers import Activation
from tensorflow.keras import backend as K
from tensorflow.keras import callbacks
from tensorflow.keras import utils
import tensorflow.keras as keras
import tensorflow as tf
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter('ignore')

In [42]:
train = pd.read_csv('dataset/attrition/train.csv')
test = pd.read_csv('dataset/attrition/test.csv')

In [43]:
train.Attrition = train.Attrition.map({'No':0, 'Yes':1})

In [44]:
test['Attrition'] = -1

In [45]:
data = pd.concat([train, test]).reset_index(drop=True)
data.drop(['Over18', 'StandardHours'], axis=1, inplace=True)

In [46]:
data.head(3)

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager,user_id
0,58,0,Travel_Rarely,605,Sales,21,3,Life Sciences,1,1938,...,3,1,29,2,2,1,0,0,0,1374
1,45,0,Travel_Rarely,950,Research & Development,28,3,Technical Degree,1,1546,...,4,1,8,3,3,5,4,0,3,1092
2,40,0,Travel_Rarely,300,Sales,26,3,Marketing,1,1066,...,2,1,8,3,2,7,7,7,5,768


In [47]:
dense_features = ['Age', 'DailyRate', 'MonthlyIncome', 'NumCompaniesWorked', 'PercentSalaryHike', 'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion', 'YearsWithCurrManager']
sparse_features = [feature for feature in data.columns if feature not in dense_features + ['user_id', 'Attrition']]

In [48]:
train.columns.to_list()

['user_id',
 'Age',
 'Attrition',
 'BusinessTravel',
 'DailyRate',
 'Department',
 'DistanceFromHome',
 'Education',
 'EducationField',
 'EmployeeCount',
 'EmployeeNumber',
 'EnvironmentSatisfaction',
 'Gender',
 'HourlyRate',
 'JobInvolvement',
 'JobLevel',
 'JobRole',
 'JobSatisfaction',
 'MaritalStatus',
 'MonthlyIncome',
 'MonthlyRate',
 'NumCompaniesWorked',
 'Over18',
 'OverTime',
 'PercentSalaryHike',
 'PerformanceRating',
 'RelationshipSatisfaction',
 'StandardHours',
 'StockOptionLevel',
 'TotalWorkingYears',
 'TrainingTimesLastYear',
 'WorkLifeBalance',
 'YearsAtCompany',
 'YearsInCurrentRole',
 'YearsSinceLastPromotion',
 'YearsWithCurrManager']

In [49]:
train[dense_features] = train[dense_features].astype(np.float64)
test[dense_features] = test[dense_features].astype(np.float64)

In [50]:
for feat in sparse_features:
    lbe = LabelEncoder()
    data[feat] = lbe.fit_transform(data[feat].fillna('-1').astype(str).values)

In [51]:
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])

In [52]:
train = data[data.Attrition != -1].reset_index(drop=True)
test = data[data.Attrition == -1].reset_index(drop=True)

In [53]:
train.head(3)

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager,user_id
0,0.952381,0,2,0.360057,2,13,2,1,0,734,...,2,1,22,2,1,0.025,0.0,0.0,0.0,1374
1,0.642857,0,2,0.607015,1,20,2,5,0,424,...,3,1,38,3,2,0.125,0.222222,0.0,0.176471,1092
2,0.52381,0,2,0.141732,2,18,2,2,0,60,...,1,1,38,3,1,0.175,0.388889,0.466667,0.294118,768


In [54]:
fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique()) for feat in sparse_features] + [DenseFeat(feat, 1, ) for feat in dense_features] 

dnn_feature_columns = fixlen_feature_columns
linear_feature_columns = fixlen_feature_columns

feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
feature_names

['BusinessTravel',
 'Department',
 'DistanceFromHome',
 'Education',
 'EducationField',
 'EmployeeCount',
 'EmployeeNumber',
 'EnvironmentSatisfaction',
 'Gender',
 'HourlyRate',
 'JobInvolvement',
 'JobLevel',
 'JobRole',
 'JobSatisfaction',
 'MaritalStatus',
 'MonthlyRate',
 'OverTime',
 'PerformanceRating',
 'RelationshipSatisfaction',
 'StockOptionLevel',
 'TotalWorkingYears',
 'TrainingTimesLastYear',
 'WorkLifeBalance',
 'Age',
 'DailyRate',
 'MonthlyIncome',
 'NumCompaniesWorked',
 'PercentSalaryHike',
 'YearsAtCompany',
 'YearsInCurrentRole',
 'YearsSinceLastPromotion',
 'YearsWithCurrManager']

In [55]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1470 entries, 0 to 1469
Data columns (total 34 columns):
Age                         1470 non-null float64
Attrition                   1470 non-null int64
BusinessTravel              1470 non-null int32
DailyRate                   1470 non-null float64
Department                  1470 non-null int32
DistanceFromHome            1470 non-null int32
Education                   1470 non-null int32
EducationField              1470 non-null int32
EmployeeCount               1470 non-null int32
EmployeeNumber              1470 non-null int32
EnvironmentSatisfaction     1470 non-null int32
Gender                      1470 non-null int32
HourlyRate                  1470 non-null int32
JobInvolvement              1470 non-null int32
JobLevel                    1470 non-null int32
JobRole                     1470 non-null int32
JobSatisfaction             1470 non-null int32
MaritalStatus               1470 non-null int32
MonthlyIncome            

In [56]:
def auc(y_true, y_pred):
    def fallback_auc(y_true, y_pred):
        try:
            return roc_auc_score(y_true, y_pred)
        except:
            return 0.5
    return tf.py_function(fallback_auc, (y_true, y_pred), tf.double)

**CE**
$$CE(p_t)=-log(p_t)$$
**FL**
$$FL(p_t) = -(1-p_t)^\gamma log(p_t)$$
**$\alpha$-balanced**
$$FL(p_t)=-\alpha_t(1-p_t)^\gamma log(p_t)$$

In [57]:
K.epsilon()

1e-07

In [58]:
# https://arxiv.org/abs/1708.02002 
def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        # tf.where(input, a, b): 其中a和b均为size一样的tensor,作用是将a中的input中的true的位置的元素不变，其余元素进行替换为b中对应的元素
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.mean(alpha * K.pow(1. - pt_1, gamma) * K.log(K.epsilon() + pt_1))-K.mean((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0 + K.epsilon()))
    return focal_loss_fixed

get_custom_objects().update({'focal_loss_fn': focal_loss()})

**Gaussian Error Linerar Units(GELUS)**:a activation function using in Bert/Tranfromer

假设为正太分布的情况下，其公式如下
$$GELU(x)=0.5x(1+tanh(\sqrt{(2/\pi)}(x+0.044715x^3)))$$

In [59]:
# https://arxiv.org/pdf/1606.08415.pdf
def custom_gelu(x):
    return 0.5 * x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))

get_custom_objects().update({'custom_gelu': Activation(custom_gelu)})

**Mish**: Activation Function
$$mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + e^{x}))$$

In [60]:
# https://arxiv.org/ftp/arxiv/papers/1908/1908.08681.pdf
class Mish(Activation):
    '''
    Mish Activation Function.
    .. math::
        mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + e^{x}))
    Shape:
        - Input: Arbitrary. Use the keyword argument `input_shape`
        (tuple of integers, does not include the samples axis)
        when using this layer as the first layer in a model.
        - Output: Same shape as the input.
    Examples:
        >>> X = Activation('Mish', name="conv1_act")(X_input)
    '''

    def __init__(self, activation, **kwargs):
        super(Mish, self).__init__(activation, **kwargs)
        self.__name__ = 'Mish'


def mish(inputs):
    return inputs * tf.math.tanh(tf.math.softplus(inputs))


get_custom_objects().update({'Mish': Mish(mish)})

In [61]:
class WarmUpLearningRateScheduler(tf.keras.callbacks.Callback):
    def __init__(self, warmup_batches, init_lr, verbose=0):
        super(WarmUpLearningRateScheduler, self).__init__()
        self.warmup_batches = warmup_batches
        self.init_lr = init_lr
        self.verbose = verbose
        self.batch_count = 0
        self.learning_rates = []
        
    def on_batch_end(self, batch, logs=None):
        self.batch_count = self.batch_count + 1
        lr = K.get_value(self.model.optimizer.lr)
        self.learning_rates.append(lr)
    
    def on_batch_begin(self, batch, logs=None):
        if self.batch_count <= self.warmup_batches:
            lr = self.batch_count * self.init_lr / self.warmup_batches
            K.set_value(self.model.optimizer.lr, lr)
            if self.verbose > 0:
                print('\nBatch %05d: WarmUpLearningRateScheduler setting learning to %s.'% (self.batch_count + 1, lr))

**Learning Rate**

 Cyclical Learning Rates for Training Neural Networks:https://arxiv.org/pdf/1506.01186.pdf

In [62]:
class CyclicLR(keras.callbacks.Callback):

    def __init__(self, base_lr=0.001, max_lr=0.006, step_size=2000., mode='triangular',
                 gamma=1., scale_fn=None, scale_mode='cycle'):
        super(CyclicLR, self).__init__()

        self.base_lr = base_lr
        self.max_lr = max_lr
        self.step_size = step_size
        self.mode = mode
        self.gamma = gamma
        if scale_fn == None:
            if self.mode == 'triangular':
                self.scale_fn = lambda x: 1.
                self.scale_mode = 'cycle'
            elif self.mode == 'triangular2':
                self.scale_fn = lambda x: 1 / (2. ** (x - 1))
                self.scale_mode = 'cycle'
            elif self.mode == 'exp_range':
                self.scale_fn = lambda x: gamma ** (x)
                self.scale_mode = 'iterations'
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode
        self.clr_iterations = 0.
        self.trn_iterations = 0.
        self.history = {}

        self._reset()

    def _reset(self, new_base_lr=None, new_max_lr=None,
               new_step_size=None):
        """Resets cycle iterations.
        Optional boundary/step size adjustment.
        """
        if new_base_lr != None:
            self.base_lr = new_base_lr
        if new_max_lr != None:
            self.max_lr = new_max_lr
        if new_step_size != None:
            self.step_size = new_step_size
        self.clr_iterations = 0.

    def clr(self):
        cycle = np.floor(1 + self.clr_iterations / (2 * self.step_size))
        x = np.abs(self.clr_iterations / self.step_size - 2 * cycle + 1)
        if self.scale_mode == 'cycle':
            return self.base_lr + (self.max_lr - self.base_lr) * np.maximum(0, (1 - x)) * self.scale_fn(cycle)
        else:
            return self.base_lr + (self.max_lr - self.base_lr) * np.maximum(0, (1 - x)) * self.scale_fn(
                self.clr_iterations)

    def on_train_begin(self, logs={}):
        logs = logs or {}

        if self.clr_iterations == 0:
            K.set_value(self.model.optimizer.lr, self.base_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.clr())

    def on_batch_end(self, epoch, logs=None):

        logs = logs or {}
        self.trn_iterations += 1
        self.clr_iterations += 1

        K.set_value(self.model.optimizer.lr, self.clr())

In [63]:
target = ['Attrition']
N_Splits = 5
Verbose = 0
Epochs = 10
SEED = 2020
Batch_S_T = 8
Batch_S_P = 64


**DeepFM**
github: https://github.com/shenweichen/DeepCTR

https://arxiv.org/pdf/1703.04247.pdf

In [None]:
oof_pred_deepfm = np.zeros((len(train), ))
y_pred_deepfm = np.zeros((len(test), ))


skf = StratifiedKFold(n_splits=N_Splits, shuffle=True, random_state=SEED)
for fold, (tr_ind, val_ind) in enumerate(skf.split(train, train[target])):
    X_train, X_val = train[feature_names].iloc[tr_ind], train[feature_names].iloc[val_ind]
    y_train, y_val = train[target].iloc[tr_ind], train[target].iloc[val_ind]
    train_model_input = {name:X_train[name] for name in feature_names}
    val_model_input = {name:X_val[name] for name in feature_names}
    test_model_input = {name:test[name] for name in feature_names}
    model = DeepFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 256, 256), dnn_dropout=0.0, dnn_activation='Mish', dnn_use_bn=False, task='binary', ) # device='GPU'
    model.compile('adam', loss = 'focal_loss_fn', metrics=[auc], )# binary_crossentropy
    es = callbacks.EarlyStopping(monitor='val_auc', min_delta=0.001, patience=3, verbose=Verbose, mode='max', baseline=None, restore_best_weights=True)
    sb = callbacks.ModelCheckpoint('dataset/attrition/model/nn_model.w8', save_weights_only=True, save_best_only=True, verbose=Verbose)
    clr = CyclicLR(base_lr=1e-3, max_lr = 3.5e-3, step_size= int(1.0*(test.shape[0])/(Batch_S_T*4)) , mode='exp_range', gamma=1.0, scale_fn=None, scale_mode='cycle')
    history = model.fit(train_model_input, y_train,
                        validation_data=(val_model_input, y_val),
                        batch_size=Batch_S_T, epochs=Epochs, verbose=Verbose,
                        callbacks=[es, sb, clr],)
    model.load_weights('dataset/attrition/model/nn_model.w8')
    val_pred = model.predict(val_model_input, batch_size=Batch_S_P)
    print(f'validation AUC fold {fold+1} : {round(roc_auc_score(y_val, val_pred), 5)}')
    oof_pred_deepfm[val_ind] = val_pred.ravel()
    y_pred_deepfm += model.predict(test_model_input, batch_size=Batch_S_P).ravel() / (N_Splits)
    K.clear_session()

validation AUC fold 1 : 0.82855
validation AUC fold 2 : 0.85954
validation AUC fold 3 : 0.75403
validation AUC fold 4 : 0.84999


In [None]:
print(f'OOF AUC : {round(roc_auc_score(train.Attrition.values, oof_pred_deepfm), 5)}')

In [24]:
test_idx = test.user_id.values
submission = pd.DataFrame.from_dict({
    'user_id': test_idx,
    'Attrition': y_pred_deepfm
})
submission.to_csv('submission.csv', index=False)