In [1]:
import os
import numpy as np
import pandas as pd

from scipy.sparse import csr_matrix

from tqdm import tqdm

from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error, ndcg_score, recall_score
from tensorflow.python.keras.preprocessing.sequence import pad_sequences

In [2]:
import torch
import torch.nn as nn
import torch.functional as F

from deepctr_torch.inputs import SparseFeat, VarLenSparseFeat, get_feature_names

In [3]:
from deepctr_torch.models import DIN
from deepctr_torch.models import AFM
from deepctr_torch.models import WDL
from deepctr_torch.models import xDeepFM
from deepctr_torch.models import DeepFM
from deepctr_torch.models import PNN
from deepctr_torch.models import DCN
from deepctr_torch.models import CCPM

In [4]:
import utils

In [5]:
device = 'cpu'
use_cuda = True
if use_cuda and torch.cuda.is_available():
    print('cuda ready...')
    device = 'cuda:1'

root = os.path.join(os.getcwd(), "Movielens")
rel_p = os.path.join(root, "user_movie.dat")

user_cnt = 943

cuda ready...


### User

In [6]:
age, age_i, age_m = utils.m2m_to_list(os.path.join(os.getcwd(), "Movielens", "user_age.dat"), user_cnt=user_cnt)
occupation, occupation_i, occupation_m = utils.m2m_to_list(os.path.join(os.getcwd(), "Movielens", "user_occupation.dat"), user_cnt=user_cnt)
# user, user_i, user_m = utils.m2m_to_list(os.path.join(os.getcwd(), "Movielens", "user_user.dat"), user_cnt=user_cnt)

### Book

In [7]:
genre, genre_i, genre_m = utils.m2m_to_list(os.path.join(os.getcwd(), "Movielens", "movie_genre.dat"), user_cnt=1682)
# publisher, publisher_i, publisher_m = utils.m2m_to_list(os.path.join(os.getcwd(), "Movielens", "book_publisher.dat"), user_cnt=user_cnt)
# author, author_i, author_m = utils.m2m_to_list(os.path.join(os.getcwd(), "Movielens", "book_author.dat"), user_cnt=user_cnt)

### Interact

In [8]:
sparse_features = ["cols_0", "cols_1"] # user_id, movie_id
rating = "cols_2"

In [9]:
rel = utils.read_file(rel_p)
rel.head()

Unnamed: 0,cols_0,cols_1,cols_2,cols_3
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [10]:
_age = age[rel.cols_0 -1]
_occupation = occupation[rel.cols_0 -1]
# _user = user[rel.cols_0 -1]

_genre = genre[rel.cols_1 -1]
# _publisher = publisher[rel.cols_1 -1]
# _author = author[rel.cols_1 -1]

In [11]:
# 1.Label Encoding for sparse features,and process sequence features
for feat in sparse_features:
    lbe = LabelEncoder()
    rel[feat] = lbe.fit_transform(rel[feat])

In [12]:
fixlen_feature_columns = [SparseFeat(feat, rel[feat].nunique(), embedding_dim=4) for feat in sparse_features]

varlen_feature_columns = [
    VarLenSparseFeat(SparseFeat('age', vocabulary_size=age_i + 1, embedding_dim=4), maxlen=age_m, combiner='mean'),
    VarLenSparseFeat(SparseFeat('occupation', vocabulary_size=occupation_i + 1, embedding_dim=4), maxlen=occupation_m, combiner='mean'),
#     VarLenSparseFeat(SparseFeat('user', vocabulary_size=13024 + 1, embedding_dim=4), maxlen=user_m, combiner='mean'),
    
    VarLenSparseFeat(SparseFeat('genre', vocabulary_size=genre_i + 1, embedding_dim=4), maxlen=genre_m, combiner='mean'),
#     VarLenSparseFeat(SparseFeat('publisher', vocabulary_size=publisher_i + 1, embedding_dim=4), maxlen=publisher_m, combiner='mean'),
#     VarLenSparseFeat(SparseFeat('author', vocabulary_size=author_i + 1, embedding_dim=4), maxlen=author_m, combiner='mean')
]

In [13]:
linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns

feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

In [14]:
k = 5
kf = KFold(n_splits=5)

In [15]:
deepfm_mse = 0
ccpm_mse = 0
ipnn_mse = 0
opnn_mse = 0
wdl_mse = 0
dcn_mse = 0
xdeepfm_mse = 0
afm_mse = 0
din_mse = 0

In [16]:
deepfm_recall = 0
ccpm_recall = 0
ipnn_recall = 0
opnn_recall = 0
wdl_recall = 0
dcn_recall = 0
xdeepfm_recall = 0
afm_recall = 0
din_recall = 0

In [17]:
deepfm_ndcg = 0
ccpm_ndcg = 0
ipnn_ndcg = 0
opnn_ndcg = 0
wdl_ndcg = 0
dcn_ndcg = 0
xdeepfm_ndcg = 0
afm_ndcg = 0
din_ndcg = 0

In [None]:
fold_cnt = 0

for train_index, test_index in kf.split(rel):
    
    fold_cnt += 1
    print("========================== {} Fold ==============================\n\n".format(fold_cnt))
    
    ### train
    train_input = {name: rel[name][train_index] for name in sparse_features}
    train_input["age"] = _age[train_index]
    train_input["occupation"] = _occupation[train_index]
    train_input["genre"] = _genre[train_index]
    train_target = np.array(rel[rating][train_index])
    train_target = np.where(train_target < 3, 0, train_target)
    
    ### test
    test_input = {name: rel[name][test_index] for name in sparse_features}
    test_input["age"] = _age[test_index]
    test_input["occupation"] = _occupation[test_index]
    test_input["genre"] = _genre[test_index]
    test_target = np.array(rel[rating][test_index])
    test_target = np.where(test_target < 3, 0, test_target)
    binary_target = np.where(test_target >= 3, 1, 0).reshape(1, -1)
    
    print("\n\n Training DeepFM \n")
    ### DeepFM
    deepfm = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)
    deepfm.compile("adam", "mse", metrics=['mse'], )
    deepfm_hist = deepfm.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = deepfm.predict(test_input, batch_size=256)
    deepfm_mse += mean_squared_error(test_target, pred_ans)
    
    pred_ans = np.where(pred_ans > 3, 1, 0).reshape((1, -1))
    deepfm_recall += recall_score(binary_target.reshape(-1, 1), pred_ans.reshape(-1, 1))
    deepfm_ndcg += ndcg_score(binary_target, pred_ans)
#     print("DeepFM MSE", round(mean_squared_error(test_target, pred_ans), 4))
    
    print("\n\n Training CCPM \n")
    ### CCPM
    ccpm = CCPM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)
    ccpm.compile("adam", "mse", metrics=['mse'], )
    ccpm_hist = ccpm.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = ccpm.predict(test_input, batch_size=256)
    ccpm_mse += mean_squared_error(test_target, pred_ans)
    
    pred_ans = np.where(pred_ans > 3, 1, 0).reshape((1, -1))
    ccpm_recall += recall_score(binary_target.reshape(-1, 1), pred_ans.reshape(-1, 1))
    ccpm_ndcg += ndcg_score(binary_target, pred_ans)
#     print("CCPM MSE", round(mean_squared_error(test_target, pred_ans), 4))
    
    print("\n\n Training IPMM \n")
    ### IPNN
    ipnn = PNN(dnn_feature_columns, task='regression', device=device, use_inner=True, use_outter=False)
    ipnn.compile("adam", "mse", metrics=['mse'], )
    ipnn_hist = ipnn.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = ipnn.predict(test_input, batch_size=256)
    ipnn_mse += mean_squared_error(test_target, pred_ans)
    
    pred_ans = np.where(pred_ans > 3, 1, 0).reshape((1, -1))
    ipnn_recall += recall_score(binary_target.reshape(-1, 1), pred_ans.reshape(-1, 1))
    ipnn_ndcg += ndcg_score(binary_target, pred_ans)
#     print("IPNN MSE", round(mean_squared_error(test_target, pred_ans), 4))
    
    print("\n\n Training OPNN \n")
    ### OPNN
    opnn = PNN(dnn_feature_columns, task='regression', device=device, use_inner=False, use_outter=True)
    opnn.compile("adam", "mse", metrics=['mse'], )
    opnn_hist = opnn.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = opnn.predict(test_input, batch_size=256)
    opnn_mse += mean_squared_error(test_target, pred_ans)
    
    pred_ans = np.where(pred_ans > 3, 1, 0).reshape((1, -1))
    opnn_recall += recall_score(binary_target.reshape(-1, 1), pred_ans.reshape(-1, 1))
    opnn_ndcg += ndcg_score(binary_target, pred_ans)
#     print("OPNN MSE", round(mean_squared_error(test_target, pred_ans), 4))
    
    print("\n\n Training Wide&Deep \n")
    ### Wide & Deep
    wdl = WDL(linear_feature_columns, dnn_feature_columns, task='regression', device=device)
    wdl.compile("adam", "mse", metrics=['mse'], )
    wdl_hist = wdl.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = wdl.predict(test_input, batch_size=256)
    wdl_mse += mean_squared_error(test_target, pred_ans)
    
    pred_ans = np.where(pred_ans > 3, 1, 0).reshape((1, -1))
    wdl_recall += recall_score(binary_target.reshape(-1, 1), pred_ans.reshape(-1, 1))
    wdl_ndcg += ndcg_score(binary_target, pred_ans)
#     print("WDL MSE", round(mean_squared_error(test_target, pred_ans), 4))
    
    print("\n\n Training DCN \n")
    ### DCN
    dcn = DCN(linear_feature_columns, dnn_feature_columns, task='regression', device=device)
    dcn.compile("adam", "mse", metrics=['mse'], )
    dcn_hist = dcn.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = dcn.predict(test_input, batch_size=256)
    dcn_mse += mean_squared_error(test_target, pred_ans)
    
    pred_ans = np.where(pred_ans > 3, 1, 0).reshape((1, -1))
    dcn_recall += recall_score(binary_target.reshape(-1, 1), pred_ans.reshape(-1, 1))
    dcn_ndcg += ndcg_score(binary_target, pred_ans)
#     print("DCN MSE", round(mean_squared_error(test_target, pred_ans), 4))
    
    
    print("\n\n Training xDeepFM \n")
    ### xDeepFM
    xdeepfm = xDeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)
    xdeepfm.compile("adam", "mse", metrics=['mse'], )
    xdeepfm_hist = xdeepfm.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = xdeepfm.predict(test_input, batch_size=256)
    xdeepfm_mse += mean_squared_error(test_target, pred_ans)
    
    pred_ans = np.where(pred_ans > 3, 1, 0).reshape((1, -1))
    xdeepfm_recall += recall_score(binary_target.reshape(-1, 1), pred_ans.reshape(-1, 1))
    xdeepfm_ndcg += ndcg_score(binary_target, pred_ans)
#     print("xDeepFM MSE", round(mean_squared_error(test_target, pred_ans), 4))
    
    
    print("\n\n Training AFM \n")
    ### AFM
    afm = AFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)
    afm.compile("adam", "mse", metrics=['mse'], )
    afm_hist = afm.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = afm.predict(test_input, batch_size=256)
    afm_mse += mean_squared_error(test_target, pred_ans)
    
    pred_ans = np.where(pred_ans > 3, 1, 0).reshape((1, -1))
    afm_recall += recall_score(binary_target.reshape(-1, 1), pred_ans.reshape(-1, 1))
    afm_ndcg += ndcg_score(binary_target, pred_ans)
#     print("AFM MSE", round(mean_squared_error(test_target, pred_ans), 4))
    
    """
    print("\n Training DIN \n")
    ### DIN
    din = DIN(dnn_feature_columns, [], task='regression', device=device)
    din.compile("adam", "mse", metrics=['mse'], )
    din_hist = din.fit(train_input, train_target, batch_size=256, epochs=10, verbose=2, validation_split=0.1)
    pred_ans = din.predict(test_input, batch_size=256)
    din_mse += mean_squared_error(test_target, pred_ans)
    print("DIN MSE", round(mean_squared_error(test_target, pred_ans), 4))
    """





 Training DeepFM 

cuda:1
Train on 72000 samples, validate on 8000 samples, 282 steps per epoch
Epoch 1/10
13s - loss:  2.9872 - mse:  2.9840 - val_mse:  2.0168
Epoch 2/10
13s - loss:  1.9481 - mse:  1.9495 - val_mse:  1.9836
Epoch 3/10
5s - loss:  1.9152 - mse:  1.9158 - val_mse:  1.9753
Epoch 4/10
12s - loss:  1.9049 - mse:  1.9044 - val_mse:  1.9740
Epoch 5/10
13s - loss:  1.8991 - mse:  1.8976 - val_mse:  1.9793
Epoch 6/10
6s - loss:  1.8964 - mse:  1.8961 - val_mse:  1.9863
Epoch 7/10
11s - loss:  1.8914 - mse:  1.8909 - val_mse:  1.9761
Epoch 8/10
13s - loss:  1.8887 - mse:  1.8893 - val_mse:  1.9783
Epoch 9/10
8s - loss:  1.8883 - mse:  1.8880 - val_mse:  1.9810
Epoch 10/10
10s - loss:  1.8868 - mse:  1.8857 - val_mse:  1.9797


 Training CCPM 

cuda:1
Train on 72000 samples, validate on 8000 samples, 282 steps per epoch
Epoch 1/10
13s - loss:  3.2502 - mse:  3.2482 - val_mse:  2.3387
Epoch 2/10
6s - loss:  2.0361 - mse:  2.0372 - val_mse:  1.9916
Epoch 3/10
11s - loss:  1.9

Epoch 9/10
21s - loss:  1.8142 - mse:  1.8157 - val_mse:  1.9370
Epoch 10/10
26s - loss:  1.7827 - mse:  1.7823 - val_mse:  1.9332


 Training OPNN 

cuda:1
Train on 72000 samples, validate on 8000 samples, 282 steps per epoch
Epoch 1/10
28s - loss:  3.6433 - mse:  3.6388 - val_mse:  2.0420
Epoch 2/10
23s - loss:  1.9561 - mse:  1.9570 - val_mse:  1.9933
Epoch 3/10
30s - loss:  1.9135 - mse:  1.9119 - val_mse:  1.9841
Epoch 4/10
27s - loss:  1.9012 - mse:  1.9006 - val_mse:  1.9799
Epoch 5/10
25s - loss:  1.8874 - mse:  1.8879 - val_mse:  1.9679
Epoch 6/10
29s - loss:  1.8733 - mse:  1.8731 - val_mse:  1.9663
Epoch 7/10
27s - loss:  1.8548 - mse:  1.8542 - val_mse:  1.9532
Epoch 8/10
23s - loss:  1.8321 - mse:  1.8319 - val_mse:  1.9450
Epoch 9/10
22s - loss:  1.8124 - mse:  1.8115 - val_mse:  1.9334
Epoch 10/10
21s - loss:  1.7986 - mse:  1.7998 - val_mse:  1.9401


 Training Wide&Deep 

cuda:1
Train on 72000 samples, validate on 8000 samples, 282 steps per epoch
Epoch 1/10
46s - loss

Epoch 8/10
9s - loss:  1.8858 - mse:  1.8864 - val_mse:  1.9917
Epoch 9/10
9s - loss:  1.8820 - mse:  1.8837 - val_mse:  1.9905
Epoch 10/10
9s - loss:  1.8763 - mse:  1.8757 - val_mse:  1.9873


 Training xDeepFM 

cuda:1
Train on 72000 samples, validate on 8000 samples, 282 steps per epoch
Epoch 1/10
15s - loss:  2.5725 - mse:  2.5703 - val_mse:  2.0129
Epoch 2/10
16s - loss:  1.9369 - mse:  1.9361 - val_mse:  1.9843
Epoch 3/10
15s - loss:  1.9036 - mse:  1.9032 - val_mse:  1.9817
Epoch 4/10
15s - loss:  1.8879 - mse:  1.8876 - val_mse:  1.9862
Epoch 5/10
15s - loss:  1.8808 - mse:  1.8812 - val_mse:  2.0140
Epoch 6/10
16s - loss:  1.8674 - mse:  1.8698 - val_mse:  1.9684
Epoch 7/10
15s - loss:  1.8611 - mse:  1.8632 - val_mse:  1.9731
Epoch 8/10
11s - loss:  1.8535 - mse:  1.8516 - val_mse:  1.9745
Epoch 9/10
10s - loss:  1.8500 - mse:  1.8487 - val_mse:  1.9816
Epoch 10/10
10s - loss:  1.8397 - mse:  1.8397 - val_mse:  1.9902


 Training AFM 

cuda:1
Train on 72000 samples, validate

Epoch 6/10
15s - loss:  1.8885 - mse:  1.8882 - val_mse:  2.0082
Epoch 7/10
15s - loss:  1.8850 - mse:  1.8843 - val_mse:  2.0085
Epoch 8/10
15s - loss:  1.8845 - mse:  1.8858 - val_mse:  2.0021
Epoch 9/10
15s - loss:  1.8811 - mse:  1.8804 - val_mse:  2.0101
Epoch 10/10
15s - loss:  1.8771 - mse:  1.8768 - val_mse:  2.0106


 Training CCPM 

cuda:1
Train on 72000 samples, validate on 8000 samples, 282 steps per epoch
Epoch 1/10
12s - loss:  3.2446 - mse:  3.2464 - val_mse:  2.3378
Epoch 2/10
9s - loss:  2.0243 - mse:  2.0276 - val_mse:  2.0157
Epoch 3/10
9s - loss:  1.9000 - mse:  1.8976 - val_mse:  1.9861
Epoch 4/10
13s - loss:  1.8736 - mse:  1.8744 - val_mse:  1.9933
Epoch 5/10
15s - loss:  1.8574 - mse:  1.8570 - val_mse:  1.9810
Epoch 6/10
15s - loss:  1.8492 - mse:  1.8509 - val_mse:  1.9809
Epoch 7/10
14s - loss:  1.8402 - mse:  1.8386 - val_mse:  1.9841
Epoch 8/10
14s - loss:  1.8335 - mse:  1.8333 - val_mse:  1.9779
Epoch 9/10
13s - loss:  1.8253 - mse:  1.8259 - val_mse:  1.

In [None]:
deepfm_mse /5

In [None]:
ccpm_mse /5

In [None]:
ipnn_mse /5

In [None]:
opnn_mse /5

In [None]:
wdl_mse /5

In [None]:
dcn_mse /5

In [None]:
xdeepfm_mse /5

In [None]:
afm_mse /5

In [None]:
deepfm_recall /5

In [None]:
ccpm_recall /5

In [None]:
ipnn_recall /5

In [None]:
opnn_recall /5

In [None]:
dcn_recall /5

In [None]:
wdl_recall /5

In [None]:
xdeepfm_recall /5

In [None]:
afm_recall /5

In [None]:
deepfm_ndcg /5

In [None]:
ccpm_ndcg /5

In [None]:
ipnn_ndcg /5

In [None]:
opnn_ndcg /5

In [None]:
wdl_ndcg /5

In [None]:
dcn_ndcg /5

In [None]:
xdeepfm_ndcg /5

In [None]:
afm_ndcg /5

# Test single model

In [38]:
train_p = os.path.join(root, "train.dat")

In [39]:
group, group_i, group_m = utils.m2m_to_list(os.path.join(os.getcwd(), "DoubanBook", "user_group.dat"), user_cnt=13024)
location, location_i, location_m = utils.m2m_to_list(os.path.join(os.getcwd(), "DoubanBook", "user_location.dat"), user_cnt=13024)
user, user_i, user_m = utils.m2m_to_list(os.path.join(os.getcwd(), "DoubanBook", "user_user.dat"), user_cnt=13024)

In [40]:
year, year_i, year_m = utils.m2m_to_list(os.path.join(os.getcwd(), "DoubanBook", "book_year.dat"), user_cnt=22347)
publisher, publisher_i, publisher_m = utils.m2m_to_list(os.path.join(os.getcwd(), "DoubanBook", "book_publisher.dat"), user_cnt=22347)
author, author_i, author_m = utils.m2m_to_list(os.path.join(os.getcwd(), "DoubanBook", "book_author.dat"), user_cnt=22347)

In [41]:
sparse_features = ["cols_0", "cols_1"] # user_id, movie_id
rating = "cols_2"

In [42]:
train = utils.read_file(train_p)
train.head()

Unnamed: 0,cols_0,cols_1,cols_2
0,9608,791,3
1,11607,2664,4
2,3630,712,4
3,12070,5046,5
4,3967,202,5


In [43]:
_group = group[rel.cols_0 -1]
_location = location[rel.cols_0 -1]
_user = user[rel.cols_0 -1]

_year = year[rel.cols_1 -1]
_publisher = publisher[rel.cols_1 -1]
_author = author[rel.cols_1 -1]

In [44]:
# 1.Label Encoding for sparse features,and process sequence features
for feat in sparse_features:
    lbe = LabelEncoder()
    train[feat] = lbe.fit_transform(train[feat])

In [45]:
fixlen_feature_columns = [SparseFeat(feat, train[feat].nunique(), embedding_dim=4) for feat in sparse_features]

varlen_feature_columns = [
    VarLenSparseFeat(SparseFeat('group', vocabulary_size=group_i + 1, embedding_dim=4), maxlen=group_m, combiner='mean'),
    VarLenSparseFeat(SparseFeat('location', vocabulary_size=location_i + 1, embedding_dim=4), maxlen=location_m, combiner='mean'),
    VarLenSparseFeat(SparseFeat('user', vocabulary_size=13024 + 1, embedding_dim=4), maxlen=user_m, combiner='mean'),
    
    VarLenSparseFeat(SparseFeat('year', vocabulary_size=year_i + 1, embedding_dim=4), maxlen=year_m, combiner='mean'),
    VarLenSparseFeat(SparseFeat('publisher', vocabulary_size=publisher_i + 1, embedding_dim=4), maxlen=publisher_m, combiner='mean'),
    VarLenSparseFeat(SparseFeat('author', vocabulary_size=author_i + 1, embedding_dim=4), maxlen=author_m, combiner='mean')
]

In [46]:
linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns

feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

In [47]:
# 3.generate input data for model
model_input = {name: train[name] for name in sparse_features}  #
model_input["group"] = _group
model_input["location"] = _location
model_input["user"] = _user
model_input["year"] = _year
model_input["publisher"] = _publisher
model_input["author"] = _author

### DeepFM

In [15]:
from deepctr_torch.models import DeepFM

In [16]:
deepfm = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)

deepfm.compile("adam", "mse", metrics=['mse'], )
history = deepfm.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
228s - loss:  0.7947 - mse:  0.7945 - val_mse:  0.5051
Epoch 2/10
230s - loss:  0.4835 - mse:  0.4835 - val_mse:  0.5019
Epoch 3/10
228s - loss:  0.4719 - mse:  0.4718 - val_mse:  0.4993
Epoch 4/10
226s - loss:  0.4649 - mse:  0.4649 - val_mse:  0.5034
Epoch 5/10
232s - loss:  0.4600 - mse:  0.4599 - val_mse:  0.4983
Epoch 6/10
226s - loss:  0.4546 - mse:  0.4545 - val_mse:  0.5073
Epoch 7/10
228s - loss:  0.4494 - mse:  0.4492 - val_mse:  0.5018
Epoch 8/10
229s - loss:  0.4428 - mse:  0.4426 - val_mse:  0.5088
Epoch 9/10
228s - loss:  0.4335 - mse:  0.4333 - val_mse:  0.4998
Epoch 10/10
229s - loss:  0.4216 - mse:  0.4213 - val_mse:  0.5087


### CCPM

In [17]:
from deepctr_torch.models import CCPM

In [18]:
ccpm = CCPM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)

ccpm.compile("adam", "mse", metrics=['mse'], )
history = ccpm.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
228s - loss:  0.7249 - mse:  0.7248 - val_mse:  0.5026
Epoch 2/10
229s - loss:  0.4766 - mse:  0.4765 - val_mse:  0.4946
Epoch 3/10
224s - loss:  0.4588 - mse:  0.4588 - val_mse:  0.4915
Epoch 4/10
231s - loss:  0.4455 - mse:  0.4456 - val_mse:  0.4947
Epoch 5/10
237s - loss:  0.4302 - mse:  0.4300 - val_mse:  0.4993
Epoch 6/10
232s - loss:  0.4124 - mse:  0.4122 - val_mse:  0.5155
Epoch 7/10
207s - loss:  0.3938 - mse:  0.3936 - val_mse:  0.5194
Epoch 8/10
210s - loss:  0.3771 - mse:  0.3770 - val_mse:  0.5336
Epoch 9/10
214s - loss:  0.3616 - mse:  0.3613 - val_mse:  0.5405
Epoch 10/10
213s - loss:  0.3485 - mse:  0.3483 - val_mse:  0.5592


### PNN

In [19]:
from deepctr_torch.models import PNN

#### IPNN

In [20]:
ipnn = PNN(dnn_feature_columns, task='regression', device=device, use_inner=True, use_outter=False)

In [21]:
ipnn.compile("adam", "mse", metrics=['mse'], )
history = ipnn.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
125s - loss:  0.9133 - mse:  0.9131 - val_mse:  0.5042
Epoch 2/10
125s - loss:  0.4784 - mse:  0.4784 - val_mse:  0.4927
Epoch 3/10
123s - loss:  0.4554 - mse:  0.4554 - val_mse:  0.4902
Epoch 4/10
124s - loss:  0.4353 - mse:  0.4353 - val_mse:  0.4972
Epoch 5/10
128s - loss:  0.4149 - mse:  0.4150 - val_mse:  0.5067
Epoch 6/10
128s - loss:  0.3920 - mse:  0.3918 - val_mse:  0.5238
Epoch 7/10
128s - loss:  0.3712 - mse:  0.3711 - val_mse:  0.5402
Epoch 8/10
136s - loss:  0.3534 - mse:  0.3532 - val_mse:  0.5562
Epoch 9/10
141s - loss:  0.3391 - mse:  0.3390 - val_mse:  0.5750
Epoch 10/10
136s - loss:  0.3282 - mse:  0.3280 - val_mse:  0.5825


#### OPNN

In [22]:
opnn = PNN(dnn_feature_columns, task='regression', device=device, use_inner=False, use_outter=True)

In [23]:
opnn.compile("adam", "mse", metrics=['mse'], )
history = opnn.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
142s - loss:  0.8448 - mse:  0.8446 - val_mse:  0.5039
Epoch 2/10
143s - loss:  0.4802 - mse:  0.4802 - val_mse:  0.4969
Epoch 3/10
141s - loss:  0.4659 - mse:  0.4658 - val_mse:  0.4955
Epoch 4/10
149s - loss:  0.4523 - mse:  0.4523 - val_mse:  0.4966
Epoch 5/10
145s - loss:  0.4381 - mse:  0.4381 - val_mse:  0.4894
Epoch 6/10
148s - loss:  0.4243 - mse:  0.4242 - val_mse:  0.4984
Epoch 7/10
149s - loss:  0.4031 - mse:  0.4031 - val_mse:  0.5068
Epoch 8/10
148s - loss:  0.3811 - mse:  0.3809 - val_mse:  0.5243
Epoch 9/10
149s - loss:  0.3619 - mse:  0.3618 - val_mse:  0.5375
Epoch 10/10
150s - loss:  0.3465 - mse:  0.3463 - val_mse:  0.5589


#### PIN

### Wide & Deep

In [24]:
from deepctr_torch.models import WDL

In [25]:
wdl = WDL(linear_feature_columns, dnn_feature_columns, task='regression', device=device)

In [26]:
wdl.compile("adam", "mse", metrics=['mse'], )
history = wdl.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
246s - loss:  0.7961 - mse:  0.7960 - val_mse:  0.5040
Epoch 2/10
240s - loss:  0.4823 - mse:  0.4823 - val_mse:  0.5012
Epoch 3/10
246s - loss:  0.4711 - mse:  0.4710 - val_mse:  0.4985
Epoch 4/10
250s - loss:  0.4652 - mse:  0.4651 - val_mse:  0.5034
Epoch 5/10
244s - loss:  0.4620 - mse:  0.4618 - val_mse:  0.4979
Epoch 6/10
246s - loss:  0.4590 - mse:  0.4589 - val_mse:  0.5057
Epoch 7/10
227s - loss:  0.4565 - mse:  0.4564 - val_mse:  0.5003
Epoch 8/10
216s - loss:  0.4539 - mse:  0.4538 - val_mse:  0.5046
Epoch 9/10
224s - loss:  0.4500 - mse:  0.4499 - val_mse:  0.4941
Epoch 10/10
246s - loss:  0.4450 - mse:  0.4449 - val_mse:  0.4990


### Deep Cross

In [27]:
from deepctr_torch.models import DCN

In [28]:
dcn = DCN(linear_feature_columns, dnn_feature_columns, task='regression', device=device)

In [29]:
dcn.compile("adam", "mse", metrics=['mse'], )
history = dcn.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
258s - loss:  0.8089 - mse:  0.8087 - val_mse:  0.5125
Epoch 2/10
261s - loss:  0.4816 - mse:  0.4817 - val_mse:  0.4968
Epoch 3/10
261s - loss:  0.4698 - mse:  0.4698 - val_mse:  0.4977
Epoch 4/10
257s - loss:  0.4629 - mse:  0.4628 - val_mse:  0.4945
Epoch 5/10
255s - loss:  0.4569 - mse:  0.4567 - val_mse:  0.4912
Epoch 6/10
258s - loss:  0.4486 - mse:  0.4486 - val_mse:  0.4922
Epoch 7/10
252s - loss:  0.4416 - mse:  0.4415 - val_mse:  0.4916
Epoch 8/10
251s - loss:  0.4345 - mse:  0.4343 - val_mse:  0.4934
Epoch 9/10
256s - loss:  0.4284 - mse:  0.4283 - val_mse:  0.4968
Epoch 10/10
263s - loss:  0.4220 - mse:  0.4217 - val_mse:  0.5029


### xDeepFM

In [30]:
from deepctr_torch.models import xDeepFM

In [31]:
xdeepfm = xDeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)

In [32]:
xdeepfm.compile("adam", "mse", metrics=['mse'], )
history = xdeepfm.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
267s - loss:  0.6677 - mse:  0.6676 - val_mse:  0.5041
Epoch 2/10
262s - loss:  0.4801 - mse:  0.4800 - val_mse:  0.5075
Epoch 3/10
260s - loss:  0.4634 - mse:  0.4634 - val_mse:  0.4897
Epoch 4/10
259s - loss:  0.4514 - mse:  0.4513 - val_mse:  0.4937
Epoch 5/10
266s - loss:  0.4428 - mse:  0.4428 - val_mse:  0.4951
Epoch 6/10
261s - loss:  0.4348 - mse:  0.4347 - val_mse:  0.4953
Epoch 7/10
262s - loss:  0.4266 - mse:  0.4264 - val_mse:  0.4972
Epoch 8/10
253s - loss:  0.4124 - mse:  0.4123 - val_mse:  0.5075
Epoch 9/10
265s - loss:  0.3919 - mse:  0.3917 - val_mse:  0.5153
Epoch 10/10
268s - loss:  0.3690 - mse:  0.3688 - val_mse:  0.5356


### Attentional Factorization Machine

In [33]:
from deepctr_torch.models import AFM

In [34]:
afm = AFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)

In [35]:
afm.compile("adam", "mse", metrics=['mse'], )
history = afm.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
255s - loss:  2.5075 - mse:  2.5064 - val_mse:  0.6100
Epoch 2/10
245s - loss:  0.5585 - mse:  0.5580 - val_mse:  0.5407
Epoch 3/10
250s - loss:  0.5036 - mse:  0.5033 - val_mse:  0.5176
Epoch 4/10
248s - loss:  0.4790 - mse:  0.4785 - val_mse:  0.5092
Epoch 5/10
255s - loss:  0.4663 - mse:  0.4658 - val_mse:  0.5048
Epoch 6/10
251s - loss:  0.4587 - mse:  0.4581 - val_mse:  0.5022
Epoch 7/10
253s - loss:  0.4536 - mse:  0.4529 - val_mse:  0.5014
Epoch 8/10
251s - loss:  0.4497 - mse:  0.4490 - val_mse:  0.5012
Epoch 9/10
256s - loss:  0.4468 - mse:  0.4461 - val_mse:  0.5005
Epoch 10/10
254s - loss:  0.4443 - mse:  0.4436 - val_mse:  0.5011


### Neural Factorization Machine

In [36]:
from deepctr_torch.models import NFM

In [37]:
nfm = NFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device)

In [38]:
nfm.compile("adam", "mse", metrics=['mse'], )
history = nfm.fit(model_input, train_df[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch
Epoch 1/10
247s - loss:  0.7533 - mse:  0.7532 - val_mse:  0.5090
Epoch 2/10
234s - loss:  0.4786 - mse:  0.4785 - val_mse:  0.4960
Epoch 3/10
246s - loss:  0.4566 - mse:  0.4565 - val_mse:  0.4959
Epoch 4/10
246s - loss:  0.4432 - mse:  0.4432 - val_mse:  0.4988
Epoch 5/10
242s - loss:  0.4338 - mse:  0.4337 - val_mse:  0.5024
Epoch 6/10
248s - loss:  0.4259 - mse:  0.4258 - val_mse:  0.5026
Epoch 7/10
238s - loss:  0.4191 - mse:  0.4189 - val_mse:  0.5034
Epoch 8/10
248s - loss:  0.4125 - mse:  0.4123 - val_mse:  0.5059
Epoch 9/10
236s - loss:  0.4040 - mse:  0.4037 - val_mse:  0.5117
Epoch 10/10
230s - loss:  0.3946 - mse:  0.3943 - val_mse:  0.5179


### Deep Interest Network

In [48]:
from deepctr_torch.models import DIN

In [51]:
behavior_feature_list = np.array(["cols_0", "cols_1"])

In [52]:
din = DIN(dnn_feature_columns, behavior_feature_list, task='regression', device=device)

In [53]:
din.compile("adam", "mse", metrics=['mse'], )
history = din.fit(model_input, train[rating].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2)

cuda:0
Train on 456227 samples, validate on 114057 samples, 1783 steps per epoch


AttributeError: 'VarLenSparseFeat' object has no attribute 'use_hash'