In [1]:
import os
import numpy as np
import pandas as pd

from scipy.sparse import csr_matrix

from tqdm import tqdm

from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error, ndcg_score, recall_score
from tensorflow.python.keras.preprocessing.sequence import pad_sequences

In [2]:
import torch
import torch.nn as nn
import torch.functional as F

from deepctr_torch.inputs import SparseFeat, VarLenSparseFeat, get_feature_names

In [3]:
from deepctr_torch.models import DIN
from deepctr_torch.models import AFM
from deepctr_torch.models import WDL
from deepctr_torch.models import xDeepFM
from deepctr_torch.models import DeepFM
from deepctr_torch.models import PNN
from deepctr_torch.models import DCN
from deepctr_torch.models import CCPM

In [4]:
import utils

In [5]:
device = 'cpu'
use_cuda = True
if use_cuda and torch.cuda.is_available():
    print('cuda ready...')
    device = 'cuda:0'

root = os.path.join(os.getcwd(), "Yelp")
rel_p = os.path.join(root, "user_business.dat")

user_cnt = 16239

cuda ready...


### User

In [6]:
compliment, compliment_i, compliment_m = utils.m2m_to_list(os.path.join(os.getcwd(), "Yelp", "user_compliment.dat"), user_cnt=user_cnt)
user, user_i, user_m = utils.m2m_to_list(os.path.join(os.getcwd(), "Yelp", "user_user.dat"), user_cnt=user_cnt)
# user, user_i, user_m = utils.m2m_to_list(os.path.join(os.getcwd(), "Movielens", "user_user.dat"), user_cnt=user_cnt)

### Book

In [7]:
city, city_i, city_m = utils.m2m_to_list(os.path.join(os.getcwd(), "Yelp", "business_city.dat"), user_cnt=14284)
category, category_i, category_m = utils.m2m_to_list(os.path.join(os.getcwd(), "Yelp", "business_category.dat"), user_cnt=14284)
# author, author_i, author_m = utils.m2m_to_list(os.path.join(os.getcwd(), "Movielens", "book_author.dat"), user_cnt=user_cnt)

### Interact

In [8]:
sparse_features = ["cols_0", "cols_1"] # user_id, movie_id
rating = "cols_2"

In [9]:
rel = utils.read_file(rel_p)
rel.head()

Unnamed: 0,cols_0,cols_1,cols_2
0,1,8391,5
1,1,8971,5
2,2,186,5
3,2,205,5
4,2,209,4


In [10]:
_compliment = compliment[rel.cols_0 -1]
_user = user[rel.cols_0 -1]
# _user = user[rel.cols_0 -1]

_category = category[rel.cols_1 -1]
_city = city[rel.cols_1 -1]
# _author = author[rel.cols_1 -1]

In [11]:
# 1.Label Encoding for sparse features,and process sequence features
for feat in sparse_features:
    lbe = LabelEncoder()
    rel[feat] = lbe.fit_transform(rel[feat])

In [12]:
fixlen_feature_columns = [SparseFeat(feat, rel[feat].nunique(), embedding_dim=4) for feat in sparse_features]

varlen_feature_columns = [
    VarLenSparseFeat(SparseFeat('compliment', vocabulary_size=compliment_i + 1, embedding_dim=4), maxlen=compliment_m, combiner='mean'),
    VarLenSparseFeat(SparseFeat('user', vocabulary_size=user_cnt + 1, embedding_dim=4), maxlen=user_m, combiner='mean'),
#     VarLenSparseFeat(SparseFeat('user', vocabulary_size=13024 + 1, embedding_dim=4), maxlen=user_m, combiner='mean'),
    
    VarLenSparseFeat(SparseFeat('category', vocabulary_size=category_i + 1, embedding_dim=4), maxlen=category_m, combiner='mean'),
    VarLenSparseFeat(SparseFeat('city', vocabulary_size=city_i + 1, embedding_dim=4), maxlen=city_m, combiner='mean'),
#     VarLenSparseFeat(SparseFeat('author', vocabulary_size=author_i + 1, embedding_dim=4), maxlen=author_m, combiner='mean')
]

In [13]:
linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns

feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

In [14]:
k = 5
kf = KFold(n_splits=5)

In [15]:
deepfm_mse = 0
ccpm_mse = 0
ipnn_mse = 0
opnn_mse = 0
wdl_mse = 0
dcn_mse = 0
xdeepfm_mse = 0
afm_mse = 0
din_mse = 0

In [16]:
deepfm_recall = 0
ccpm_recall = 0
ipnn_recall = 0
opnn_recall = 0
wdl_recall = 0
dcn_recall = 0
xdeepfm_recall = 0
afm_recall = 0
din_recall = 0

In [17]:
deepfm_ndcg = 0
ccpm_ndcg = 0
ipnn_ndcg = 0
opnn_ndcg = 0
wdl_ndcg = 0
dcn_ndcg = 0
xdeepfm_ndcg = 0
afm_ndcg = 0
din_ndcg = 0

In [18]:
fold_cnt = 0

for train_index, test_index in kf.split(rel):
    
    fold_cnt += 1
    print("========================== {} Fold ==============================\n\n".format(fold_cnt))
    
    ### train
    train_input = {name: rel[name][train_index] for name in sparse_features}
    train_input["compliment"] = _compliment[train_index]
    train_input["user"] = _user[train_index]
    train_input["city"] = _city[train_index]
    train_input["category"] = _category[train_index]
    train_target = np.array(rel[rating][train_index])
    
    ### test
    test_input = {name: rel[name][test_index] for name in sparse_features}
    test_input["compliment"] = _compliment[test_index]
    test_input["user"] = _user[test_index]
    test_input["city"] = _city[test_index]
    test_input["category"] = _category[test_index]
    test_target = np.array(rel[rating][test_index])
    binary_target = np.where(test_target > 3, 1, 0).reshape(1, -1)
    
    print("\n\n Training DeepFM \n")
    ### DeepFM
    deepfm = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)
    deepfm.compile("adam", "mse", metrics=['mse'], )
    deepfm_hist = deepfm.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = deepfm.predict(test_input, batch_size=256)
    deepfm_mse += mean_squared_error(test_target, pred_ans)
    
    pred_ans = np.where(pred_ans > 3, 1, 0).reshape((1, -1))
    deepfm_recall += recall_score(binary_target.reshape(-1, 1), pred_ans.reshape(-1, 1))
    deepfm_ndcg += ndcg_score(binary_target, pred_ans)
#     print("DeepFM MSE", round(mean_squared_error(test_target, pred_ans), 4))
    
    print("\n\n Training CCPM \n")
    ### CCPM
    ccpm = CCPM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)
    ccpm.compile("adam", "mse", metrics=['mse'], )
    ccpm_hist = ccpm.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = ccpm.predict(test_input, batch_size=256)
    ccpm_mse += mean_squared_error(test_target, pred_ans)
    
    pred_ans = np.where(pred_ans > 3, 1, 0).reshape((1, -1))
    ccpm_recall += recall_score(binary_target.reshape(-1, 1), pred_ans.reshape(-1, 1))
    ccpm_ndcg += ndcg_score(binary_target, pred_ans)
#     print("CCPM MSE", round(mean_squared_error(test_target, pred_ans), 4))
    
    print("\n\n Training IPMM \n")
    ### IPNN
    ipnn = PNN(dnn_feature_columns, task='regression', device=device, use_inner=True, use_outter=False)
    ipnn.compile("adam", "mse", metrics=['mse'], )
    ipnn_hist = ipnn.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = ipnn.predict(test_input, batch_size=256)
    ipnn_mse += mean_squared_error(test_target, pred_ans)
    
    pred_ans = np.where(pred_ans > 3, 1, 0).reshape((1, -1))
    ipnn_recall += recall_score(binary_target.reshape(-1, 1), pred_ans.reshape(-1, 1))
    ipnn_ndcg += ndcg_score(binary_target, pred_ans)
#     print("IPNN MSE", round(mean_squared_error(test_target, pred_ans), 4))
    
    print("\n\n Training OPNN \n")
    ### OPNN
    opnn = PNN(dnn_feature_columns, task='regression', device=device, use_inner=False, use_outter=True)
    opnn.compile("adam", "mse", metrics=['mse'], )
    opnn_hist = opnn.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = opnn.predict(test_input, batch_size=256)
    opnn_mse += mean_squared_error(test_target, pred_ans)
    
    pred_ans = np.where(pred_ans > 3, 1, 0).reshape((1, -1))
    opnn_recall += recall_score(binary_target.reshape(-1, 1), pred_ans.reshape(-1, 1))
    opnn_ndcg += ndcg_score(binary_target, pred_ans)
#     print("OPNN MSE", round(mean_squared_error(test_target, pred_ans), 4))
    
    print("\n\n Training Wide&Deep \n")
    ### Wide & Deep
    wdl = WDL(linear_feature_columns, dnn_feature_columns, task='regression', device=device)
    wdl.compile("adam", "mse", metrics=['mse'], )
    wdl_hist = wdl.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = wdl.predict(test_input, batch_size=256)
    wdl_mse += mean_squared_error(test_target, pred_ans)
    
    pred_ans = np.where(pred_ans > 3, 1, 0).reshape((1, -1))
    wdl_recall += recall_score(binary_target.reshape(-1, 1), pred_ans.reshape(-1, 1))
    wdl_ndcg += ndcg_score(binary_target, pred_ans)
#     print("WDL MSE", round(mean_squared_error(test_target, pred_ans), 4))
    
    print("\n\n Training DCN \n")
    ### DCN
    dcn = DCN(linear_feature_columns, dnn_feature_columns, task='regression', device=device)
    dcn.compile("adam", "mse", metrics=['mse'], )
    dcn_hist = dcn.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = dcn.predict(test_input, batch_size=256)
    dcn_mse += mean_squared_error(test_target, pred_ans)
    
    pred_ans = np.where(pred_ans > 3, 1, 0).reshape((1, -1))
    dcn_recall += recall_score(binary_target.reshape(-1, 1), pred_ans.reshape(-1, 1))
    dcn_ndcg += ndcg_score(binary_target, pred_ans)
#     print("DCN MSE", round(mean_squared_error(test_target, pred_ans), 4))
    
    
    print("\n\n Training xDeepFM \n")
    ### xDeepFM
    xdeepfm = xDeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)
    xdeepfm.compile("adam", "mse", metrics=['mse'], )
    xdeepfm_hist = xdeepfm.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = xdeepfm.predict(test_input, batch_size=256)
    xdeepfm_mse += mean_squared_error(test_target, pred_ans)
    
    pred_ans = np.where(pred_ans > 3, 1, 0).reshape((1, -1))
    xdeepfm_recall += recall_score(binary_target.reshape(-1, 1), pred_ans.reshape(-1, 1))
    xdeepfm_ndcg += ndcg_score(binary_target, pred_ans)
#     print("xDeepFM MSE", round(mean_squared_error(test_target, pred_ans), 4))
    
    
    print("\n\n Training AFM \n")
    ### AFM
    afm = AFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)
    afm.compile("adam", "mse", metrics=['mse'], )
    afm_hist = afm.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = afm.predict(test_input, batch_size=256)
    afm_mse += mean_squared_error(test_target, pred_ans)
    
    pred_ans = np.where(pred_ans > 3, 1, 0).reshape((1, -1))
    afm_recall += recall_score(binary_target.reshape(-1, 1), pred_ans.reshape(-1, 1))
    afm_ndcg += ndcg_score(binary_target, pred_ans)
#     print("AFM MSE", round(mean_squared_error(test_target, pred_ans), 4))
    
    """
    print("\n Training DIN \n")
    ### DIN
    din = DIN(dnn_feature_columns, [], task='regression', device=device)
    din.compile("adam", "mse", metrics=['mse'], )
    din_hist = din.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = din.predict(test_input, batch_size=256)
    din_mse += mean_squared_error(test_target, pred_ans)
    print("DIN MSE", round(mean_squared_error(test_target, pred_ans), 4))
    """





 Training DeepFM 

cuda:0
Train on 142845 samples, validate on 15872 samples, 558 steps per epoch
Epoch 1/10
64s - loss:  1.7934 - mse:  1.7934 - val_mse:  1.1672
Epoch 2/10
61s - loss:  0.9725 - mse:  0.9725 - val_mse:  1.1570
Epoch 3/10
63s - loss:  0.9110 - mse:  0.9110 - val_mse:  1.1602
Epoch 4/10
60s - loss:  0.8823 - mse:  0.8823 - val_mse:  1.1760
Epoch 5/10
63s - loss:  0.8664 - mse:  0.8664 - val_mse:  1.1774
Epoch 6/10
60s - loss:  0.8544 - mse:  0.8544 - val_mse:  1.1423
Epoch 7/10
62s - loss:  0.8465 - mse:  0.8465 - val_mse:  1.1677
Epoch 8/10
63s - loss:  0.8414 - mse:  0.8413 - val_mse:  1.1381
Epoch 9/10
60s - loss:  0.8347 - mse:  0.8347 - val_mse:  1.1819
Epoch 10/10
63s - loss:  0.8315 - mse:  0.8315 - val_mse:  1.1845


 Training CCPM 

cuda:0
Train on 142845 samples, validate on 15872 samples, 558 steps per epoch
Epoch 1/10
63s - loss:  1.7181 - mse:  1.7181 - val_mse:  1.1124
Epoch 2/10
63s - loss:  0.9876 - mse:  0.9876 - val_mse:  1.1001
Epoch 3/10
57s - lo

Epoch 9/10
33s - loss:  0.7849 - mse:  0.7849 - val_mse:  1.1844
Epoch 10/10
36s - loss:  0.7644 - mse:  0.7644 - val_mse:  1.1795


 Training OPNN 

cuda:0
Train on 142845 samples, validate on 15872 samples, 558 steps per epoch
Epoch 1/10
37s - loss:  1.9124 - mse:  1.9124 - val_mse:  1.2431
Epoch 2/10
33s - loss:  0.9678 - mse:  0.9678 - val_mse:  1.1678
Epoch 3/10
36s - loss:  0.9036 - mse:  0.9036 - val_mse:  1.1616
Epoch 4/10
35s - loss:  0.8717 - mse:  0.8717 - val_mse:  1.1857
Epoch 5/10
36s - loss:  0.8527 - mse:  0.8526 - val_mse:  1.1666
Epoch 6/10
29s - loss:  0.8386 - mse:  0.8386 - val_mse:  1.1422
Epoch 7/10
36s - loss:  0.8238 - mse:  0.8238 - val_mse:  1.1638
Epoch 8/10
37s - loss:  0.8061 - mse:  0.8061 - val_mse:  1.1535
Epoch 9/10
37s - loss:  0.7823 - mse:  0.7823 - val_mse:  1.1513
Epoch 10/10
34s - loss:  0.7617 - mse:  0.7617 - val_mse:  1.1478


 Training Wide&Deep 

cuda:0
Train on 142845 samples, validate on 15872 samples, 558 steps per epoch
Epoch 1/10
62s - 

Epoch 8/10
66s - loss:  0.8268 - mse:  0.8267 - val_mse:  1.1531
Epoch 9/10
62s - loss:  0.8210 - mse:  0.8210 - val_mse:  1.1805
Epoch 10/10
66s - loss:  0.8152 - mse:  0.8151 - val_mse:  1.1489


 Training xDeepFM 

cuda:0
Train on 142846 samples, validate on 15872 samples, 558 steps per epoch
Epoch 1/10
63s - loss:  1.4119 - mse:  1.4119 - val_mse:  1.1601
Epoch 2/10
67s - loss:  0.9533 - mse:  0.9533 - val_mse:  1.1137
Epoch 3/10
63s - loss:  0.8936 - mse:  0.8936 - val_mse:  1.1084
Epoch 4/10
67s - loss:  0.8638 - mse:  0.8638 - val_mse:  1.1356
Epoch 5/10
63s - loss:  0.8423 - mse:  0.8423 - val_mse:  1.1167
Epoch 6/10
67s - loss:  0.8248 - mse:  0.8248 - val_mse:  1.1398
Epoch 7/10
64s - loss:  0.8097 - mse:  0.8097 - val_mse:  1.1364
Epoch 8/10
66s - loss:  0.7968 - mse:  0.7968 - val_mse:  1.1314
Epoch 9/10
60s - loss:  0.7849 - mse:  0.7848 - val_mse:  1.1685
Epoch 10/10
66s - loss:  0.7740 - mse:  0.7740 - val_mse:  1.1414


 Training AFM 

cuda:0
Train on 142846 samples, va

Epoch 6/10
59s - loss:  0.8569 - mse:  0.8569 - val_mse:  1.1578
Epoch 7/10
62s - loss:  0.8498 - mse:  0.8498 - val_mse:  1.1788
Epoch 8/10
62s - loss:  0.8430 - mse:  0.8430 - val_mse:  1.1816
Epoch 9/10
63s - loss:  0.8383 - mse:  0.8382 - val_mse:  1.1772
Epoch 10/10
60s - loss:  0.8330 - mse:  0.8330 - val_mse:  1.1680


 Training CCPM 

cuda:0
Train on 142846 samples, validate on 15872 samples, 558 steps per epoch
Epoch 1/10
61s - loss:  1.7178 - mse:  1.7178 - val_mse:  1.1136
Epoch 2/10
61s - loss:  0.9915 - mse:  0.9915 - val_mse:  1.1192
Epoch 3/10
64s - loss:  0.9151 - mse:  0.9151 - val_mse:  1.1198
Epoch 4/10
58s - loss:  0.8799 - mse:  0.8799 - val_mse:  1.1442
Epoch 5/10
61s - loss:  0.8568 - mse:  0.8567 - val_mse:  1.1592
Epoch 6/10
61s - loss:  0.8405 - mse:  0.8405 - val_mse:  1.1587
Epoch 7/10
65s - loss:  0.8238 - mse:  0.8237 - val_mse:  1.1886
Epoch 8/10
61s - loss:  0.8077 - mse:  0.8076 - val_mse:  1.2292
Epoch 9/10
65s - loss:  0.7892 - mse:  0.7891 - val_mse:

In [19]:
deepfm_mse /5

1.181351914169992

In [20]:
ccpm_mse /5

1.247156750644464

In [21]:
ipnn_mse /5

1.2237504230197114

In [22]:
opnn_mse /5

1.2032459417340755

In [23]:
wdl_mse /5

1.1819719420137116

In [24]:
dcn_mse /5

1.1787456364447428

In [25]:
xdeepfm_mse /5

1.2258726829876367

In [26]:
afm_mse /5

2.0971227548817133

In [27]:
deepfm_recall /5

0.9383477244528511

In [28]:
ccpm_recall /5

0.8832626772570575

In [29]:
ipnn_recall /5

0.9187351093512783

In [30]:
opnn_recall /5

0.9257387712511094

In [31]:
wdl_recall /5

0.9372927532789355

In [32]:
dcn_recall /5

0.9400705396558118

In [33]:
xdeepfm_recall /5

0.9404136288678112

In [34]:
afm_recall /5

0.8554051007429688

In [35]:
deepfm_ndcg / 5

0.962206180439169

In [36]:
ccpm_ndcg /5

0.9638919699693549

In [37]:
ipnn_ndcg /5

0.9628357715520451

In [38]:
opnn_ndcg / 5

0.9625913045520829

In [39]:
wdl_ndcg / 5

0.9622612026110966

In [40]:
dcn_ndcg / 5

0.9620958874825464

In [41]:
xdeepfm_ndcg / 5

0.9617192195249606

In [42]:
afm_ndcg / 5

0.9602507389334376

# Test single model

In [38]:
train_p = os.path.join(root, "train.dat")

In [39]:
group, group_i, group_m = utils.m2m_to_list(os.path.join(os.getcwd(), "DoubanBook", "user_group.dat"), user_cnt=13024)
location, location_i, location_m = utils.m2m_to_list(os.path.join(os.getcwd(), "DoubanBook", "user_location.dat"), user_cnt=13024)
user, user_i, user_m = utils.m2m_to_list(os.path.join(os.getcwd(), "DoubanBook", "user_user.dat"), user_cnt=13024)

In [40]:
year, year_i, year_m = utils.m2m_to_list(os.path.join(os.getcwd(), "DoubanBook", "book_year.dat"), user_cnt=22347)
publisher, publisher_i, publisher_m = utils.m2m_to_list(os.path.join(os.getcwd(), "DoubanBook", "book_publisher.dat"), user_cnt=22347)
author, author_i, author_m = utils.m2m_to_list(os.path.join(os.getcwd(), "DoubanBook", "book_author.dat"), user_cnt=22347)

In [41]:
sparse_features = ["cols_0", "cols_1"] # user_id, movie_id
rating = "cols_2"

In [42]:
train = utils.read_file(train_p)
train.head()

Unnamed: 0,cols_0,cols_1,cols_2
0,9608,791,3
1,11607,2664,4
2,3630,712,4
3,12070,5046,5
4,3967,202,5


In [43]:
_group = group[rel.cols_0 -1]
_location = location[rel.cols_0 -1]
_user = user[rel.cols_0 -1]

_year = year[rel.cols_1 -1]
_publisher = publisher[rel.cols_1 -1]
_author = author[rel.cols_1 -1]

In [44]:
# 1.Label Encoding for sparse features,and process sequence features
for feat in sparse_features:
    lbe = LabelEncoder()
    train[feat] = lbe.fit_transform(train[feat])

In [45]:
fixlen_feature_columns = [SparseFeat(feat, train[feat].nunique(), embedding_dim=4) for feat in sparse_features]

varlen_feature_columns = [
    VarLenSparseFeat(SparseFeat('group', vocabulary_size=group_i + 1, embedding_dim=4), maxlen=group_m, combiner='mean'),
    VarLenSparseFeat(SparseFeat('location', vocabulary_size=location_i + 1, embedding_dim=4), maxlen=location_m, combiner='mean'),
    VarLenSparseFeat(SparseFeat('user', vocabulary_size=13024 + 1, embedding_dim=4), maxlen=user_m, combiner='mean'),
    
    VarLenSparseFeat(SparseFeat('year', vocabulary_size=year_i + 1, embedding_dim=4), maxlen=year_m, combiner='mean'),
    VarLenSparseFeat(SparseFeat('publisher', vocabulary_size=publisher_i + 1, embedding_dim=4), maxlen=publisher_m, combiner='mean'),
    VarLenSparseFeat(SparseFeat('author', vocabulary_size=author_i + 1, embedding_dim=4), maxlen=author_m, combiner='mean')
]

In [46]:
linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns

feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

In [47]:
# 3.generate input data for model
model_input = {name: train[name] for name in sparse_features}  #
model_input["group"] = _group
model_input["location"] = _location
model_input["user"] = _user
model_input["year"] = _year
model_input["publisher"] = _publisher
model_input["author"] = _author

### DeepFM

In [15]:
from deepctr_torch.models import DeepFM

In [16]:
deepfm = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)

deepfm.compile("adam", "mse", metrics=['mse'], )
history = deepfm.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
228s - loss:  0.7947 - mse:  0.7945 - val_mse:  0.5051
Epoch 2/10
230s - loss:  0.4835 - mse:  0.4835 - val_mse:  0.5019
Epoch 3/10
228s - loss:  0.4719 - mse:  0.4718 - val_mse:  0.4993
Epoch 4/10
226s - loss:  0.4649 - mse:  0.4649 - val_mse:  0.5034
Epoch 5/10
232s - loss:  0.4600 - mse:  0.4599 - val_mse:  0.4983
Epoch 6/10
226s - loss:  0.4546 - mse:  0.4545 - val_mse:  0.5073
Epoch 7/10
228s - loss:  0.4494 - mse:  0.4492 - val_mse:  0.5018
Epoch 8/10
229s - loss:  0.4428 - mse:  0.4426 - val_mse:  0.5088
Epoch 9/10
228s - loss:  0.4335 - mse:  0.4333 - val_mse:  0.4998
Epoch 10/10
229s - loss:  0.4216 - mse:  0.4213 - val_mse:  0.5087


### CCPM

In [17]:
from deepctr_torch.models import CCPM

In [18]:
ccpm = CCPM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)

ccpm.compile("adam", "mse", metrics=['mse'], )
history = ccpm.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
228s - loss:  0.7249 - mse:  0.7248 - val_mse:  0.5026
Epoch 2/10
229s - loss:  0.4766 - mse:  0.4765 - val_mse:  0.4946
Epoch 3/10
224s - loss:  0.4588 - mse:  0.4588 - val_mse:  0.4915
Epoch 4/10
231s - loss:  0.4455 - mse:  0.4456 - val_mse:  0.4947
Epoch 5/10
237s - loss:  0.4302 - mse:  0.4300 - val_mse:  0.4993
Epoch 6/10
232s - loss:  0.4124 - mse:  0.4122 - val_mse:  0.5155
Epoch 7/10
207s - loss:  0.3938 - mse:  0.3936 - val_mse:  0.5194
Epoch 8/10
210s - loss:  0.3771 - mse:  0.3770 - val_mse:  0.5336
Epoch 9/10
214s - loss:  0.3616 - mse:  0.3613 - val_mse:  0.5405
Epoch 10/10
213s - loss:  0.3485 - mse:  0.3483 - val_mse:  0.5592


### PNN

In [19]:
from deepctr_torch.models import PNN

#### IPNN

In [20]:
ipnn = PNN(dnn_feature_columns, task='regression', device=device, use_inner=True, use_outter=False)

In [21]:
ipnn.compile("adam", "mse", metrics=['mse'], )
history = ipnn.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
125s - loss:  0.9133 - mse:  0.9131 - val_mse:  0.5042
Epoch 2/10
125s - loss:  0.4784 - mse:  0.4784 - val_mse:  0.4927
Epoch 3/10
123s - loss:  0.4554 - mse:  0.4554 - val_mse:  0.4902
Epoch 4/10
124s - loss:  0.4353 - mse:  0.4353 - val_mse:  0.4972
Epoch 5/10
128s - loss:  0.4149 - mse:  0.4150 - val_mse:  0.5067
Epoch 6/10
128s - loss:  0.3920 - mse:  0.3918 - val_mse:  0.5238
Epoch 7/10
128s - loss:  0.3712 - mse:  0.3711 - val_mse:  0.5402
Epoch 8/10
136s - loss:  0.3534 - mse:  0.3532 - val_mse:  0.5562
Epoch 9/10
141s - loss:  0.3391 - mse:  0.3390 - val_mse:  0.5750
Epoch 10/10
136s - loss:  0.3282 - mse:  0.3280 - val_mse:  0.5825


#### OPNN

In [22]:
opnn = PNN(dnn_feature_columns, task='regression', device=device, use_inner=False, use_outter=True)

In [23]:
opnn.compile("adam", "mse", metrics=['mse'], )
history = opnn.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
142s - loss:  0.8448 - mse:  0.8446 - val_mse:  0.5039
Epoch 2/10
143s - loss:  0.4802 - mse:  0.4802 - val_mse:  0.4969
Epoch 3/10
141s - loss:  0.4659 - mse:  0.4658 - val_mse:  0.4955
Epoch 4/10
149s - loss:  0.4523 - mse:  0.4523 - val_mse:  0.4966
Epoch 5/10
145s - loss:  0.4381 - mse:  0.4381 - val_mse:  0.4894
Epoch 6/10
148s - loss:  0.4243 - mse:  0.4242 - val_mse:  0.4984
Epoch 7/10
149s - loss:  0.4031 - mse:  0.4031 - val_mse:  0.5068
Epoch 8/10
148s - loss:  0.3811 - mse:  0.3809 - val_mse:  0.5243
Epoch 9/10
149s - loss:  0.3619 - mse:  0.3618 - val_mse:  0.5375
Epoch 10/10
150s - loss:  0.3465 - mse:  0.3463 - val_mse:  0.5589


#### PIN

### Wide & Deep

In [24]:
from deepctr_torch.models import WDL

In [25]:
wdl = WDL(linear_feature_columns, dnn_feature_columns, task='regression', device=device)

In [26]:
wdl.compile("adam", "mse", metrics=['mse'], )
history = wdl.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
246s - loss:  0.7961 - mse:  0.7960 - val_mse:  0.5040
Epoch 2/10
240s - loss:  0.4823 - mse:  0.4823 - val_mse:  0.5012
Epoch 3/10
246s - loss:  0.4711 - mse:  0.4710 - val_mse:  0.4985
Epoch 4/10
250s - loss:  0.4652 - mse:  0.4651 - val_mse:  0.5034
Epoch 5/10
244s - loss:  0.4620 - mse:  0.4618 - val_mse:  0.4979
Epoch 6/10
246s - loss:  0.4590 - mse:  0.4589 - val_mse:  0.5057
Epoch 7/10
227s - loss:  0.4565 - mse:  0.4564 - val_mse:  0.5003
Epoch 8/10
216s - loss:  0.4539 - mse:  0.4538 - val_mse:  0.5046
Epoch 9/10
224s - loss:  0.4500 - mse:  0.4499 - val_mse:  0.4941
Epoch 10/10
246s - loss:  0.4450 - mse:  0.4449 - val_mse:  0.4990


### Deep Cross

In [27]:
from deepctr_torch.models import DCN

In [28]:
dcn = DCN(linear_feature_columns, dnn_feature_columns, task='regression', device=device)

In [29]:
dcn.compile("adam", "mse", metrics=['mse'], )
history = dcn.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
258s - loss:  0.8089 - mse:  0.8087 - val_mse:  0.5125
Epoch 2/10
261s - loss:  0.4816 - mse:  0.4817 - val_mse:  0.4968
Epoch 3/10
261s - loss:  0.4698 - mse:  0.4698 - val_mse:  0.4977
Epoch 4/10
257s - loss:  0.4629 - mse:  0.4628 - val_mse:  0.4945
Epoch 5/10
255s - loss:  0.4569 - mse:  0.4567 - val_mse:  0.4912
Epoch 6/10
258s - loss:  0.4486 - mse:  0.4486 - val_mse:  0.4922
Epoch 7/10
252s - loss:  0.4416 - mse:  0.4415 - val_mse:  0.4916
Epoch 8/10
251s - loss:  0.4345 - mse:  0.4343 - val_mse:  0.4934
Epoch 9/10
256s - loss:  0.4284 - mse:  0.4283 - val_mse:  0.4968
Epoch 10/10
263s - loss:  0.4220 - mse:  0.4217 - val_mse:  0.5029


### xDeepFM

In [30]:
from deepctr_torch.models import xDeepFM

In [31]:
xdeepfm = xDeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)

In [32]:
xdeepfm.compile("adam", "mse", metrics=['mse'], )
history = xdeepfm.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
267s - loss:  0.6677 - mse:  0.6676 - val_mse:  0.5041
Epoch 2/10
262s - loss:  0.4801 - mse:  0.4800 - val_mse:  0.5075
Epoch 3/10
260s - loss:  0.4634 - mse:  0.4634 - val_mse:  0.4897
Epoch 4/10
259s - loss:  0.4514 - mse:  0.4513 - val_mse:  0.4937
Epoch 5/10
266s - loss:  0.4428 - mse:  0.4428 - val_mse:  0.4951
Epoch 6/10
261s - loss:  0.4348 - mse:  0.4347 - val_mse:  0.4953
Epoch 7/10
262s - loss:  0.4266 - mse:  0.4264 - val_mse:  0.4972
Epoch 8/10
253s - loss:  0.4124 - mse:  0.4123 - val_mse:  0.5075
Epoch 9/10
265s - loss:  0.3919 - mse:  0.3917 - val_mse:  0.5153
Epoch 10/10
268s - loss:  0.3690 - mse:  0.3688 - val_mse:  0.5356


### Attentional Factorization Machine

In [33]:
from deepctr_torch.models import AFM

In [34]:
afm = AFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)

In [35]:
afm.compile("adam", "mse", metrics=['mse'], )
history = afm.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
255s - loss:  2.5075 - mse:  2.5064 - val_mse:  0.6100
Epoch 2/10
245s - loss:  0.5585 - mse:  0.5580 - val_mse:  0.5407
Epoch 3/10
250s - loss:  0.5036 - mse:  0.5033 - val_mse:  0.5176
Epoch 4/10
248s - loss:  0.4790 - mse:  0.4785 - val_mse:  0.5092
Epoch 5/10
255s - loss:  0.4663 - mse:  0.4658 - val_mse:  0.5048
Epoch 6/10
251s - loss:  0.4587 - mse:  0.4581 - val_mse:  0.5022
Epoch 7/10
253s - loss:  0.4536 - mse:  0.4529 - val_mse:  0.5014
Epoch 8/10
251s - loss:  0.4497 - mse:  0.4490 - val_mse:  0.5012
Epoch 9/10
256s - loss:  0.4468 - mse:  0.4461 - val_mse:  0.5005
Epoch 10/10
254s - loss:  0.4443 - mse:  0.4436 - val_mse:  0.5011


### Neural Factorization Machine

In [36]:
from deepctr_torch.models import NFM

In [37]:
nfm = NFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)

In [38]:
nfm.compile("adam", "mse", metrics=['mse'], )
history = nfm.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
247s - loss:  0.7533 - mse:  0.7532 - val_mse:  0.5090
Epoch 2/10
234s - loss:  0.4786 - mse:  0.4785 - val_mse:  0.4960
Epoch 3/10
246s - loss:  0.4566 - mse:  0.4565 - val_mse:  0.4959
Epoch 4/10
246s - loss:  0.4432 - mse:  0.4432 - val_mse:  0.4988
Epoch 5/10
242s - loss:  0.4338 - mse:  0.4337 - val_mse:  0.5024
Epoch 6/10
248s - loss:  0.4259 - mse:  0.4258 - val_mse:  0.5026
Epoch 7/10
238s - loss:  0.4191 - mse:  0.4189 - val_mse:  0.5034
Epoch 8/10
248s - loss:  0.4125 - mse:  0.4123 - val_mse:  0.5059
Epoch 9/10
236s - loss:  0.4040 - mse:  0.4037 - val_mse:  0.5117
Epoch 10/10
230s - loss:  0.3946 - mse:  0.3943 - val_mse:  0.5179


### Deep Interest Network

In [48]:
from deepctr_torch.models import DIN

In [51]:
behavior_feature_list = np.array(["cols_0", "cols_1"])

In [52]:
din = DIN(dnn_feature_columns, behavior_feature_list, task='regression', device=device)

In [53]:
din.compile("adam", "mse", metrics=['mse'], )
history = din.fit(model_input, train[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch


AttributeError: 'VarLenSparseFeat' object has no attribute 'use_hash'