In [1]:
import numpy as np
import pandas as pd
from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import roc_auc_score

import warnings
warnings.filterwarnings('ignore')

### task 1 aims to predict whether the income exceeds 50K, 
### task 2 aims to predict whether this person’s marital status is never married.

In [2]:
CENSUS_COLUMNS = ['age','workclass','fnlwgt','education','education_num','marital_status','occupation','relationship','race','gender','capital_gain','capital_loss','hours_per_week','native_country','income_bracket']

df_train = pd.read_csv('./data/adult.data',header=None,names=CENSUS_COLUMNS)
df_test = pd.read_csv('./data/adult.test',header=None,names=CENSUS_COLUMNS)
data = pd.concat([df_train, df_test], axis=0)

#take task1 as ctr task, take task2 as ctcvr task.
data['ctr_label'] = data['income_bracket'].map({' >50K.':1, ' >50K':1, ' <=50K.':0, ' <=50K':0})
data['ctcvr_label'] = data['marital_status'].apply(lambda x: 1 if x==' Never-married' else 0)
data.drop(labels=['marital_status', 'income_bracket'], axis=1, inplace=True)


In [3]:
#define dense and sparse features
columns = data.columns.values.tolist()
dense_features = ['fnlwgt', 'education_num', 'capital_gain', 'capital_loss', 'hours_per_week']
sparse_features = [col for col in columns if col not in dense_features and col not in ['ctr_label', 'ctcvr_label']]

data[sparse_features] = data[sparse_features].fillna('-1', )
data[dense_features] = data[dense_features].fillna(0, )
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])
    
for feat in sparse_features:
    lbe = LabelEncoder()
    data[feat] = lbe.fit_transform(data[feat])
    
fixlen_feature_columns = [SparseFeat(feat, data[feat].max()+1, embedding_dim=16)for feat in sparse_features] \
+ [DenseFeat(feat, 1,) for feat in dense_features]

dnn_feature_columns = fixlen_feature_columns

feature_names = get_feature_names(dnn_feature_columns)

In [4]:
#train test split
n_train = df_train.shape[0]
train = data[:n_train]
test = data[n_train:]
train_model_input = {name: train[name] for name in feature_names}
test_model_input = {name: test[name] for name in feature_names}

In [5]:
#Test ESSM Model
from essm import ESSM
model = ESSM(dnn_feature_columns, task_type='binary', task_names=['ctr', 'ctcvr'],
        tower_dnn_units_lists=[[64, 64],[64, 64]])
model.compile("adam", loss=["binary_crossentropy", "binary_crossentropy"],
              metrics=['AUC'])

history = model.fit(train_model_input, [train['ctr_label'].values, train['ctcvr_label'].values],batch_size=256, epochs=5, verbose=2, validation_split=0.0 )

pred_ans = model.predict(test_model_input, batch_size=256)

print("test CTR AUC", round(roc_auc_score(test['ctr_label'], pred_ans[0]), 4))
print("test CTCVR AUC", round(roc_auc_score(test['ctcvr_label'], pred_ans[1]), 4))

Epoch 1/5
128/128 - 0s - loss: 1.0419 - ctr_loss: 0.5610 - ctcvr_loss: 0.4810 - ctr_auc: 0.6276 - ctcvr_auc_1: 0.8919
Epoch 2/5
128/128 - 0s - loss: 0.8686 - ctr_loss: 0.5013 - ctcvr_loss: 0.3672 - ctr_auc: 0.7799 - ctcvr_auc_1: 0.9552
Epoch 3/5
128/128 - 0s - loss: 0.8580 - ctr_loss: 0.4925 - ctcvr_loss: 0.3655 - ctr_auc: 0.7902 - ctcvr_auc_1: 0.9566
Epoch 4/5
128/128 - 0s - loss: 0.8529 - ctr_loss: 0.4879 - ctcvr_loss: 0.3650 - ctr_auc: 0.7945 - ctcvr_auc_1: 0.9564
Epoch 5/5
128/128 - 0s - loss: 0.8496 - ctr_loss: 0.4852 - ctcvr_loss: 0.3643 - ctr_auc: 0.7993 - ctcvr_auc_1: 0.9569
test CTR AUC 0.7838
test CTCVR AUC 0.9559


In [6]:
#Test Shared_Bottom Model
from shared_bottom import Shared_Bottom
task_names = ['income', 'marital']
model = Shared_Bottom(dnn_feature_columns, num_tasks=2, task_types= ['binary', 'binary'], task_names=task_names, bottom_dnn_units=[128, 128], tower_dnn_units_lists=[[64,32], [64,32]])

model.compile("adam", loss=["binary_crossentropy", "binary_crossentropy"], metrics=['AUC'])
history = model.fit(train_model_input, [train['ctr_label'].values, train['ctcvr_label'].values],batch_size=256, epochs=5, verbose=2, validation_split=0.0 )

pred_ans = model.predict(test_model_input, batch_size=256)

print("test income AUC", round(roc_auc_score(test['ctr_label'], pred_ans[0]), 4))
print("test marital AUC", round(roc_auc_score(test['ctcvr_label'], pred_ans[1]), 4))

Epoch 1/5
128/128 - 0s - loss: 0.7820 - income_loss: 0.4126 - marital_loss: 0.3693 - income_auc: 0.8222 - marital_auc_1: 0.8928
Epoch 2/5
128/128 - 0s - loss: 0.5554 - income_loss: 0.3248 - marital_loss: 0.2305 - income_auc: 0.9031 - marital_auc_1: 0.9619
Epoch 3/5
128/128 - 0s - loss: 0.5454 - income_loss: 0.3156 - marital_loss: 0.2298 - income_auc: 0.9088 - marital_auc_1: 0.9622
Epoch 4/5
128/128 - 0s - loss: 0.5408 - income_loss: 0.3133 - marital_loss: 0.2274 - income_auc: 0.9098 - marital_auc_1: 0.9628
Epoch 5/5
128/128 - 0s - loss: 0.5341 - income_loss: 0.3086 - marital_loss: 0.2254 - income_auc: 0.9130 - marital_auc_1: 0.9635
test income AUC 0.9098
test marital AUC 0.9635


In [7]:
from mmoe import MMOE

model = MMOE(dnn_feature_columns, num_tasks=2, task_types=['binary', 'binary'], task_names=task_names, 
num_experts=8, expert_dnn_units=[64,64], gate_dnn_units=[32,32], tower_dnn_units_lists=[[32,32],[32,32]])
model.compile("adam", loss=["binary_crossentropy", "binary_crossentropy"], metrics=['AUC'])

history = model.fit(train_model_input, [train['ctr_label'].values, train['ctcvr_label'].values], batch_size=256, epochs=5, verbose=2, validation_split=0.0 )

pred_ans = model.predict(test_model_input, batch_size=256)
print("test income AUC", round(roc_auc_score(test['ctr_label'], pred_ans[0]), 4))
print("test marital AUC", round(roc_auc_score(test['ctcvr_label'], pred_ans[1]), 4))

Epoch 1/5
128/128 - 1s - loss: 0.8637 - income_loss: 0.4493 - marital_loss: 0.4144 - income_auc: 0.7824 - marital_auc_1: 0.8558
Epoch 2/5
128/128 - 1s - loss: 0.5618 - income_loss: 0.3302 - marital_loss: 0.2316 - income_auc: 0.9004 - marital_auc_1: 0.9614
Epoch 3/5
128/128 - 1s - loss: 0.5508 - income_loss: 0.3216 - marital_loss: 0.2292 - income_auc: 0.9054 - marital_auc_1: 0.9622
Epoch 4/5
128/128 - 1s - loss: 0.5384 - income_loss: 0.3135 - marital_loss: 0.2248 - income_auc: 0.9102 - marital_auc_1: 0.9636
Epoch 5/5
128/128 - 1s - loss: 0.5359 - income_loss: 0.3118 - marital_loss: 0.2240 - income_auc: 0.9108 - marital_auc_1: 0.9639
test income AUC 0.9091
test marital AUC 0.9638


In [9]:
from ple_cgc import PLE_CGC

model = PLE_CGC(dnn_feature_columns, num_tasks=2, task_types=['binary', 'binary'], task_names=task_names, 
num_experts_specific=8, num_experts_shared=4, expert_dnn_units=[64,64],  gate_dnn_units=[16,16], tower_dnn_units_lists=[[32,32],[32,32]])
model.compile("adam", loss=["binary_crossentropy", "binary_crossentropy"], metrics=['AUC'])

history = model.fit(train_model_input, [train['ctr_label'].values, train['ctcvr_label'].values], batch_size=256, epochs=5, verbose=2, validation_split=0.0 )

pred_ans = model.predict(test_model_input, batch_size=256)
print("test income AUC", round(roc_auc_score(test['ctr_label'], pred_ans[0]), 4))
print("test marital AUC", round(roc_auc_score(test['ctcvr_label'], pred_ans[1]), 4))

Epoch 1/5
128/128 - 2s - loss: 0.8721 - income_loss: 0.4423 - marital_loss: 0.4297 - income_auc: 0.7902 - marital_auc_1: 0.8538
Epoch 2/5
128/128 - 2s - loss: 0.5631 - income_loss: 0.3310 - marital_loss: 0.2321 - income_auc: 0.8995 - marital_auc_1: 0.9613
Epoch 3/5
128/128 - 2s - loss: 0.5461 - income_loss: 0.3194 - marital_loss: 0.2266 - income_auc: 0.9066 - marital_auc_1: 0.9631
Epoch 4/5
128/128 - 2s - loss: 0.5412 - income_loss: 0.3151 - marital_loss: 0.2260 - income_auc: 0.9091 - marital_auc_1: 0.9632
Epoch 5/5
128/128 - 2s - loss: 0.5386 - income_loss: 0.3131 - marital_loss: 0.2254 - income_auc: 0.9102 - marital_auc_1: 0.9635
test income AUC 0.9094
test marital AUC 0.9636


In [10]:
from ple import PLE

model = PLE(dnn_feature_columns, num_tasks=2, task_types=['binary', 'binary'], task_names=task_names, 
num_levels=2, num_experts_specific=8, num_experts_shared=4, expert_dnn_units=[64,64],  gate_dnn_units=[16,16], tower_dnn_units_lists=[[32,32],[32,32]])
model.compile("adam", loss=["binary_crossentropy", "binary_crossentropy"], metrics=['AUC'])

history = model.fit(train_model_input, [train['ctr_label'].values, train['ctcvr_label'].values], batch_size=256, epochs=5, verbose=2, validation_split=0.0 )

pred_ans = model.predict(test_model_input, batch_size=256)
print("test income AUC", round(roc_auc_score(test['ctr_label'], pred_ans[0]), 4))
print("test marital AUC", round(roc_auc_score(test['ctcvr_label'], pred_ans[1]), 4))

Epoch 1/5
128/128 - 3s - loss: 0.9043 - income_loss: 0.4642 - marital_loss: 0.4401 - income_auc: 0.7619 - marital_auc_1: 0.8424
Epoch 2/5
128/128 - 3s - loss: 0.5665 - income_loss: 0.3339 - marital_loss: 0.2325 - income_auc: 0.8979 - marital_auc_1: 0.9610
Epoch 3/5
128/128 - 4s - loss: 0.5454 - income_loss: 0.3203 - marital_loss: 0.2250 - income_auc: 0.9061 - marital_auc_1: 0.9636
Epoch 4/5
128/128 - 5s - loss: 0.5354 - income_loss: 0.3111 - marital_loss: 0.2242 - income_auc: 0.9116 - marital_auc_1: 0.9638
Epoch 5/5
128/128 - 4s - loss: 0.5319 - income_loss: 0.3090 - marital_loss: 0.2228 - income_auc: 0.9124 - marital_auc_1: 0.9643
test income AUC 0.9103
test marital AUC 0.9635
