In [1]:
# -*- coding: utf-8 -*-
import numpy as np
import torch
import pdb
from sklearn.metrics import roc_auc_score
np.random.seed(2020)
torch.manual_seed(2020)
import pdb

from dataset import load_data
from matrix_factorization import MF, MF_CVIB, MF_IPS, MF_SNIPS, MF_DR
from utils import gini_index, ndcg_func, get_user_wise_ctr, rating_mat_to_sample, binarize, shuffle, minU
mse_func = lambda x,y: np.mean((x-y)**2)
acc_func = lambda x,y: np.sum(x == y) / len(x)

dataset_name = "coat"

In [2]:
if dataset_name == "coat":
    train_mat, test_mat = load_data("coat")        
    x_train, y_train = rating_mat_to_sample(train_mat)
    x_test, y_test = rating_mat_to_sample(test_mat)
    num_user = train_mat.shape[0]
    num_item = train_mat.shape[1]

elif dataset_name == "yahoo":
    x_train, y_train, x_test, y_test = load_data("yahoo")
    x_train, y_train = shuffle(x_train, y_train)
    num_user = x_train[:,0].max() + 1
    num_item = x_train[:,1].max() + 1

print("# user: {}, # item: {}".format(num_user, num_item))
# binarize
y_train = binarize(y_train)
y_test = binarize(y_test)

===>Load from coat data set<===
[train] rating ratio: 0.080000
[test]  rating ratio: 0.053333
# user: 290, # item: 300


In [3]:
"MF CVIB"
mf_cvib = MF_CVIB(num_user, num_item)
mf_cvib.fit(x_train, y_train, 
    lr=0.01,
    batch_size=128,
    lamb=1e-4,
    alpha=10.0,
    gamma=1e-5,
    tol=1e-5,
    verbose=False)

test_pred = mf_cvib.predict(x_test)
mse_mf = mse_func(y_test, test_pred)
auc_mf = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf_cvib, x_test, y_test)

print("***"*5 + "[MF-CVIB]" + "***"*5)
print("[MF-CVIB] test mse:", mse_func(y_test, test_pred))
print("[MF-CVIB] test auc:", auc_mf)
print("[MF] ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
        np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[MF-CVIB]" + "***"*5)

[MF-CVIB] epoch:30, xent:108.83231997489929
***************[MF-CVIB]***************
[MF-CVIB] test mse: 0.5794288551431656
[MF-CVIB] test auc: 0.7469847095368782
[MF] ndcg@5:0.655306, ndcg@10:0.712245
Num User: 290
Gini index: 0.31512991899429077
Global utility: 0.5372413793103449
***************[MF-CVIB]***************


In [4]:
"MF naive"
mf = MF(num_user, num_item)
mf.fit(x_train, y_train, 
    lr=0.01,
    batch_size=128,
    lamb=1e-4,
    tol=1e-5,
    verbose=False)
test_pred = mf.predict(x_test)
mse_mf = mse_func(y_test, test_pred)
auc_mf = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf, x_test, y_test)

print("***"*5 + "[MF]" + "***"*5)
print("[MF] test mse:", mse_func(y_test, test_pred))
print("[MF] test auc:", auc_mf)
print("[MF] ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
        np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[MF]" + "***"*5)

[MF] epoch:92, xent:17.17500016093254
***************[MF]***************
[MF] test mse: 0.24587457540363486
[MF] test auc: 0.7032263627287906
[MF] ndcg@5:0.588338, ndcg@10:0.664943
Num User: 290
Gini index: 0.3487279372865877
Global utility: 0.4944827586206896
***************[MF]***************


In [5]:
"MF IPS"
mf_ips = MF_IPS(num_user, num_item)

ips_idxs = np.arange(len(y_test))
np.random.shuffle(ips_idxs)
y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]

mf_ips.fit(x_train, y_train,  y_ips=y_ips,
    lr=0.01,
    batch_size=128,
    lamb=1e-4,
    tol=1e-5,
    verbose=False)
test_pred = mf_ips.predict(x_test)
mse_mfips = mse_func(y_test, test_pred)
auc_mfips = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf_ips, x_test, y_test)

print("***"*5 + "[MF-IPS]" + "***"*5)
print("[MF-IPS] test mse:", mse_func(y_test, test_pred))
print("[MF-IPS] test auc:", auc_mfips)
print("[MF] ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
        np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[MF-IPS]" + "***"*5)

[MF-IPS] epoch:121, xent:144.3805215358734
***************[MF-IPS]***************
[MF-IPS] test mse: 0.2891346891790511
[MF-IPS] test auc: 0.6849764800770826
[MF] ndcg@5:0.586394, ndcg@10:0.668824
Num User: 290
Gini index: 0.3433973354231987
Global utility: 0.4855172413793104
***************[MF-IPS]***************


In [6]:
"MF-SNIPS"
mf_snips = MF_SNIPS(num_user, num_item)

ips_idxs = np.arange(len(y_test))
np.random.shuffle(ips_idxs)
y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]

mf_snips.fit(x_train, y_train,  y_ips=y_ips,
    lr=0.01,
    batch_size=128,
    lamb=1e-4,
    tol=1e-5,
    verbose=False)
test_pred = mf_snips.predict(x_test)
mse_mfsnips = mse_func(y_test, test_pred)
auc_mfsnips = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf_snips, x_test, y_test)

print("***"*5 + "[MF-SNIPS]" + "***"*5)
print("[MF-SNIPS] test mse:", mse_mfsnips)
print("[MF-SNIPS] test auc:", auc_mfsnips)
print("[MF] ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
        np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[MF-SNIPS]" + "***"*5)

[MF-SNIPS] epoch:108, xent:16.620826572179794
***************[MF-SNIPS]***************
[MF-SNIPS] test mse: 0.24541254818914784
[MF-SNIPS] test auc: 0.6906621691532132
[MF] ndcg@5:0.603736, ndcg@10:0.680151
Num User: 290
Gini index: 0.32868059814142303
Global utility: 0.5041379310344828
***************[MF-SNIPS]***************


In [7]:
"MF DR"
mf_dr = MF_DR(num_user, num_item)

ips_idxs = np.arange(len(y_test))
np.random.shuffle(ips_idxs)
y_ips = y_test[ips_idxs[:int(0.05 * len(ips_idxs))]]

mf_dr.fit(x_train, y_train,  y_ips=y_ips,
    lr=0.05,
    batch_size=128,
    lamb=1e-4,
    tol=1e-5,
    verbose=False)
test_pred = mf_dr.predict(x_test)
mse_mfdr = mse_func(y_test, test_pred)
auc_mfdr = roc_auc_score(y_test, test_pred)
ndcg_res = ndcg_func(mf_dr, x_test, y_test)

print("***"*5 + "[MF-DR]" + "***"*5)
print("[MF-DR] test mse:", mse_mfdr)
print("[MF-DR] test auc:", auc_mfdr)
print("[MF] ndcg@5:{:.6f}, ndcg@10:{:.6f}".format(
        np.mean(ndcg_res["ndcg_5"]), np.mean(ndcg_res["ndcg_10"])))
user_wise_ctr = get_user_wise_ctr(x_test,y_test,test_pred)
gi,gu = gini_index(user_wise_ctr)
print("***"*5 + "[MF-DR]" + "***"*5)

[MF-DR] epoch:45, xent:24672.74530029297
***************[MF-DR]***************
[MF-DR] test mse: 0.2747846467958318
[MF-DR] test auc: 0.6872889374005826
[MF] ndcg@5:0.609119, ndcg@10:0.676230
Num User: 290
Gini index: 0.33700959437636646
Global utility: 0.5006896551724137
***************[MF-DR]***************
