## Training and Testing

In this notebook I will focus on how the model is trained and tested

In [1]:
import tensorflow as tf
import os
import sys
import pdb
import heapq
import scipy.sparse as sp

from utility.load_data import *
from tqdm import tqdm

os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

Manually set parameters. These are simply the defaults in their `parse_args` function

In [3]:
weights_path=''
data_path='Data/'
proj_path=''
dataset='gowalla'
pretrain=0
verbose=1
epoch=500
embed_size=64
layer_size=[64]
batch_size=1024
regs=[1e-5,1e-5,1e-2]
lr=0.01
model_type='ngcf'
adj_type='norm'
alg_type='ngcf'
gpu_id=0
node_dropout_flag=0
node_dropout=[0.1]
mess_dropout=[0.1]
Ks=[20,40,60,80,100]
save_flag=0
test_flag='part'
report=0

We already know the model (explained in the notebook `ngcf_model_explained.ipynb`) 

In [4]:
class NGCF(object):
    def __init__(self, data_config, pretrain_data):
        self.model_type = 'ngcf'
        self.adj_type = adj_type
        self.alg_type = alg_type

        self.pretrain_data = pretrain_data

        self.n_users = data_config['n_users']
        self.n_items = data_config['n_items']

        self.n_fold = 100

        self.norm_adj = data_config['norm_adj']
        self.n_nonzero_elems = self.norm_adj.count_nonzero()

        self.lr = lr

        self.emb_dim = embed_size
        self.batch_size = batch_size

        self.weight_size = layer_size
        self.n_layers = len(self.weight_size)

        self.model_type += '_%s_%s_l%d' % (self.adj_type, self.alg_type, self.n_layers)

        self.regs = regs
        self.decay = self.regs[0]

        self.verbose = verbose

        '''
        *********************************************************
        Create Placeholder for Input Data & Dropout.
        '''
        # placeholder definition
        self.users = tf.placeholder(tf.int32, shape=(None,))
        self.pos_items = tf.placeholder(tf.int32, shape=(None,))
        self.neg_items = tf.placeholder(tf.int32, shape=(None,))

        # dropout: node dropout (adopted on the ego-networks);
        #          ... since the usage of node dropout have higher computational cost,
        #          ... please use the 'node_dropout_flag' to indicate whether use such technique.
        #          message dropout (adopted on the convolution operations).
        self.node_dropout_flag = node_dropout_flag
        self.node_dropout = tf.placeholder(tf.float32, shape=[None])
        self.mess_dropout = tf.placeholder(tf.float32, shape=[None])

        """
        *********************************************************
        Create Model Parameters (i.e., Initialize Weights).
        """
        # initialization of model parameters
        self.weights = self._init_weights()

        """
        *********************************************************
        Compute Graph-based Representations of all users & items via Message-Passing Mechanism of Graph Neural Networks.
        Different Convolutional Layers:
            1. ngcf: defined in 'Neural Graph Collaborative Filtering', SIGIR2019;
            2. gcn:  defined in 'Semi-Supervised Classification with Graph Convolutional Networks', ICLR2018;
            3. gcmc: defined in 'Graph Convolutional Matrix Completion', KDD2018;
        """
        if self.alg_type in ['ngcf']:
            self.ua_embeddings, self.ia_embeddings = self._create_ngcf_embed()

        elif self.alg_type in ['gcn']:
            self.ua_embeddings, self.ia_embeddings = self._create_gcn_embed()

        elif self.alg_type in ['gcmc']:
            self.ua_embeddings, self.ia_embeddings = self._create_gcmc_embed()

        """
        *********************************************************
        Establish the final representations for user-item pairs in batch.
        """
        self.u_g_embeddings = tf.nn.embedding_lookup(self.ua_embeddings, self.users)
        self.pos_i_g_embeddings = tf.nn.embedding_lookup(self.ia_embeddings, self.pos_items)
        self.neg_i_g_embeddings = tf.nn.embedding_lookup(self.ia_embeddings, self.neg_items)

        """
        *********************************************************
        Inference for the testing phase.
        """
        self.batch_ratings = tf.matmul(self.u_g_embeddings, self.pos_i_g_embeddings, transpose_a=False, transpose_b=True)

        """
        *********************************************************
        Generate Predictions & Optimize via BPR loss.
        """
        self.mf_loss, self.emb_loss, self.reg_loss = self.create_bpr_loss(self.u_g_embeddings,
                                                                          self.pos_i_g_embeddings,
                                                                          self.neg_i_g_embeddings)
        self.loss = self.mf_loss + self.emb_loss + self.reg_loss

        self.opt = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss)

    def _init_weights(self):
        all_weights = dict()

        initializer = tf.contrib.layers.xavier_initializer()

        if self.pretrain_data is None:
            all_weights['user_embedding'] = tf.Variable(initializer([self.n_users, self.emb_dim]), name='user_embedding')
            all_weights['item_embedding'] = tf.Variable(initializer([self.n_items, self.emb_dim]), name='item_embedding')
            print('using xavier initialization')
        else:
            all_weights['user_embedding'] = tf.Variable(initial_value=self.pretrain_data['user_embed'], trainable=True,
                                                        name='user_embedding', dtype=tf.float32)
            all_weights['item_embedding'] = tf.Variable(initial_value=self.pretrain_data['item_embed'], trainable=True,
                                                        name='item_embedding', dtype=tf.float32)
            print('using pretrained initialization')

        self.weight_size_list = [self.emb_dim] + self.weight_size

        for k in range(self.n_layers):
            all_weights['W_gc_%d' %k] = tf.Variable(
                initializer([self.weight_size_list[k], self.weight_size_list[k+1]]), name='W_gc_%d' % k)
            all_weights['b_gc_%d' %k] = tf.Variable(
                initializer([1, self.weight_size_list[k+1]]), name='b_gc_%d' % k)

            all_weights['W_bi_%d' % k] = tf.Variable(
                initializer([self.weight_size_list[k], self.weight_size_list[k + 1]]), name='W_bi_%d' % k)
            all_weights['b_bi_%d' % k] = tf.Variable(
                initializer([1, self.weight_size_list[k + 1]]), name='b_bi_%d' % k)

            all_weights['W_mlp_%d' % k] = tf.Variable(
                initializer([self.weight_size_list[k], self.weight_size_list[k+1]]), name='W_mlp_%d' % k)
            all_weights['b_mlp_%d' % k] = tf.Variable(
                initializer([1, self.weight_size_list[k+1]]), name='b_mlp_%d' % k)

        return all_weights

    def _split_A_hat(self, X):
        A_fold_hat = []

        fold_len = (self.n_users + self.n_items) // self.n_fold
        for i_fold in range(self.n_fold):
            start = i_fold * fold_len
            if i_fold == self.n_fold -1:
                end = self.n_users + self.n_items
            else:
                end = (i_fold + 1) * fold_len

            A_fold_hat.append(self._convert_sp_mat_to_sp_tensor(X[start:end]))
        return A_fold_hat

    def _split_A_hat_node_dropout(self, X):
        A_fold_hat = []

        fold_len = (self.n_users + self.n_items) // self.n_fold
        for i_fold in range(self.n_fold):
            start = i_fold * fold_len
            if i_fold == self.n_fold -1:
                end = self.n_users + self.n_items
            else:
                end = (i_fold + 1) * fold_len

            # A_fold_hat.append(self._convert_sp_mat_to_sp_tensor(X[start:end]))
            temp = self._convert_sp_mat_to_sp_tensor(X[start:end])
            n_nonzero_temp = X[start:end].count_nonzero()
            A_fold_hat.append(self._dropout_sparse(temp, 1 - self.node_dropout[0], n_nonzero_temp))

        return A_fold_hat

    def _create_ngcf_embed(self):
        # Generate a set of adjacency sub-matrix.
        if self.node_dropout_flag:
            # node dropout.
            A_fold_hat = self._split_A_hat_node_dropout(self.norm_adj)
        else:
            A_fold_hat = self._split_A_hat(self.norm_adj)

        ego_embeddings = tf.concat([self.weights['user_embedding'], self.weights['item_embedding']], axis=0)

        all_embeddings = [ego_embeddings]

        for k in range(0, self.n_layers):

            temp_embed = []
            for f in range(self.n_fold):
                temp_embed.append(tf.sparse_tensor_dense_matmul(A_fold_hat[f], ego_embeddings))

            # sum messages of neighbors.
            side_embeddings = tf.concat(temp_embed, 0)
            # transformed sum messages of neighbors.
            sum_embeddings = tf.nn.leaky_relu(
                tf.matmul(side_embeddings, self.weights['W_gc_%d' % k]) + self.weights['b_gc_%d' % k])

            # bi messages of neighbors.
            bi_embeddings = tf.multiply(ego_embeddings, side_embeddings)
            # transformed bi messages of neighbors.
            bi_embeddings = tf.nn.leaky_relu(
                tf.matmul(bi_embeddings, self.weights['W_bi_%d' % k]) + self.weights['b_bi_%d' % k])

            # non-linear activation.
            ego_embeddings = sum_embeddings + bi_embeddings

            # message dropout.
            ego_embeddings = tf.nn.dropout(ego_embeddings, 1 - self.mess_dropout[k])

            # normalize the distribution of embeddings.
            norm_embeddings = tf.math.l2_normalize(ego_embeddings, axis=1)

            all_embeddings += [norm_embeddings]

        all_embeddings = tf.concat(all_embeddings, 1)
        u_g_embeddings, i_g_embeddings = tf.split(all_embeddings, [self.n_users, self.n_items], 0)
        return u_g_embeddings, i_g_embeddings

    def create_bpr_loss(self, users, pos_items, neg_items):
        pos_scores = tf.reduce_sum(tf.multiply(users, pos_items), axis=1)
        neg_scores = tf.reduce_sum(tf.multiply(users, neg_items), axis=1)

        regularizer = tf.nn.l2_loss(users) + tf.nn.l2_loss(pos_items) + tf.nn.l2_loss(neg_items)
        regularizer = regularizer/self.batch_size

        maxi = tf.log(tf.nn.sigmoid(pos_scores - neg_scores))
        mf_loss = tf.negative(tf.reduce_mean(maxi))

        emb_loss = self.decay * regularizer

        reg_loss = tf.constant(0.0, tf.float32, [1])

        return mf_loss, emb_loss, reg_loss

    def _convert_sp_mat_to_sp_tensor(self, X):
        coo = X.tocoo().astype(np.float32)
        indices = np.mat([coo.row, coo.col]).transpose()
        return tf.SparseTensor(indices, coo.data, coo.shape)

    def _dropout_sparse(self, X, keep_prob, n_nonzero_elems):
        noise_shape = [n_nonzero_elems]
        random_tensor = keep_prob
        random_tensor += tf.random_uniform(noise_shape)
        dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool)
        pre_out = tf.sparse_retain(X, dropout_mask)

        return pre_out * tf.div(1., keep_prob)

Here we will not be using pretrained embeddings. However, I will include here the corresponding code for completion, since it is important.

The function below simply load pre-trained embeddings

In [5]:
def load_pretrained_data():
    pretrain_path = '%spretrain/%s/%s.npz' % (proj_path, dataset, 'embedding')
    try:
        pretrain_data = np.load(pretrain_path)
        print('load the pretrained embeddings.')
    except Exception:
        pretrain_data = None
    return pretrain_data

Simply loading the data

In [6]:
data_generator = Data(path=data_path + dataset, batch_size=batch_size)
USR_NUM, ITEM_NUM = data_generator.n_users, data_generator.n_items
N_TRAIN, N_TEST = data_generator.n_train, data_generator.n_test
BATCH_SIZE = batch_size

n_users=29858, n_items=40981
n_interactions=1027370
n_train=810128, n_test=217242, sparsity=0.00084


In [7]:
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)

config = dict()
config['n_users'] = data_generator.n_users
config['n_items'] = data_generator.n_items

Computing or loading the already computed adjancecy matrix

In [8]:
plain_adj, norm_adj, mean_adj = data_generator.get_adj_mat()

already load adj matrix (70839, 70839) 0.2664508819580078


In [9]:
config['norm_adj'] = mean_adj + sp.eye(mean_adj.shape[0])

the `help` param in the `pretrain` argument reads: "help='0: No pretrain, -1: Pretrain with the learned embeddings, 1:Pretrain with stored models." 

I insist, in our case we will not be using any pretrained weights, but just in case, let's discuss it as part of the process. Here they simply state that if `pretrain == -1`, we will load learned embeddings

In [10]:
if pretrain == -1:
    pretrain_data = load_pretrained_data()
else:
    pretrain_data = None

In [11]:
model = NGCF(data_config=config, pretrain_data=None)

W0809 09:22:54.730366 140170820278016 lazy_loader.py:50] 
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



using xavier initialization


W0809 09:22:55.528345 140170820278016 deprecation.py:506] From <ipython-input-4-be6f8a5a6d64>:200: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
W0809 09:22:55.766374 140170820278016 deprecation.py:323] From /home/ubuntu/anaconda3/envs/ngcf/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Setting the tensorflow `Saver`

In [12]:
saver = tf.train.Saver()

if save_flag == 1:
    layer = '-'.join([str(l) for l in layer_size])
    weights_save_path = '%sweights/%s/%s/%s/l%s_r%s' % (weights_path, dataset, model.model_type, layer,
                                                        str(lr), '-'.join([str(r) for r in regs]))
    ensureDir(weights_save_path)
    save_saver = tf.train.Saver(max_to_keep=1)

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

Now we move into the `pretrain==1` option

In [13]:
if pretrain == 1:
    layer = '-'.join([str(l) for l in layer_size])

    pretrain_path = '%sweights/%s/%s/%s/l%s_r%s' % (weights_path, dataset, model.model_type, layer,
                                                    str(lr), '-'.join([str(r) for r in regs]))


    ckpt = tf.train.get_checkpoint_state(os.path.dirname(pretrain_path + '/checkpoint'))
    if ckpt and ckpt.model_checkpoint_path:
        # 1. Load pretrained weights
        sess.run(tf.global_variables_initializer())
        saver.restore(sess, ckpt.model_checkpoint_path)
        print('load the pretrained model parameters from: ', pretrain_path)

        # *********************************************************
        # get the performance from pretrained model.
        if report != 1:
            # 2. Use the pretained model and compute the performance metrics
            users_to_test = list(data_generator.test_set.keys())
            ret = test(sess, model, users_to_test, drop_flag=True)
            cur_best_pre_0 = ret['recall'][0]

            pretrain_ret = 'pretrained model recall=[%.5f, %.5f], precision=[%.5f, %.5f], hit=[%.5f, %.5f],' \
                           'ndcg=[%.5f, %.5f]' % \
                           (ret['recall'][0], ret['recall'][-1],
                            ret['precision'][0], ret['precision'][-1],
                            ret['hit_ratio'][0], ret['hit_ratio'][-1],
                            ret['ndcg'][0], ret['ndcg'][-1])
            print(pretrain_ret)
    else:
        sess.run(tf.global_variables_initializer())
        cur_best_pre_0 = 0.
        print('without pretraining.')

else:
    sess.run(tf.global_variables_initializer())
    cur_best_pre_0 = 0.
    print('without pretraining.')

without pretraining.


In [14]:
loss_loger, pre_loger, rec_loger, ndcg_loger, hit_loger = [], [], [], [], []
stopping_step = 0
should_stop = False

## TRAIN

Let's just execute one run

In [15]:
t1 = time()
loss, mf_loss, emb_loss, reg_loss = 0., 0., 0., 0.
n_batch = data_generator.n_train // batch_size + 1

for idx in range(n_batch):
    #batch_size lists with user_ids, item_ids they interacted with and item_ids they did not interact with
    users, pos_items, neg_items = data_generator.sample()
    # you know, the tf sess.run fun...
    _, batch_loss, batch_mf_loss, batch_emb_loss, batch_reg_loss = sess.run(
        [model.opt, model.loss, model.mf_loss, model.emb_loss, model.reg_loss],
        feed_dict={model.users: users, 
                   model.pos_items: pos_items,
                   model.node_dropout: node_dropout,
                   model.mess_dropout: mess_dropout,model.neg_items: neg_items})
    loss += batch_loss
    mf_loss += batch_mf_loss
    emb_loss += batch_emb_loss
    reg_loss += batch_reg_loss


perf_str = 'Epoch %d [%.1fs]: train==[%.5f=%.5f + %.5f]' % (1, time() - t1, loss, mf_loss, reg_loss)
print(perf_str)

Epoch 1 [73.8s]: train==[179.00768=178.95593 + 0.00000]


# TEST

Now we have a trained model. Before we run the testing function, let's have a look to all the helpers that will be required.

**Recall**

In [20]:
def recall(rank, ground_truth, N):
    return len(set(rank[:N]) & set(ground_truth)) / float(len(set(ground_truth)))

def recall_at_k(r, k, all_pos_num):
    r = np.asfarray(r)[:k]
    return np.sum(r) / all_pos_num

In [21]:
rank,  ground_truth, N = np.random.choice(50, 10, replace=False), np.arange(10), 5

In [22]:
print(rank)
print(ground_truth)

[23 35 31 33 39 26 28 49 38 24]
[0 1 2 3 4 5 6 7 8 9]


In [23]:
recall(rank, ground_truth, N)

0.0

**Precision**

throughout all these functions, r is binary (nonzero is relevant) 

In [24]:
def precision_at_k(r, k):
    assert k >= 1
    r = np.asarray(r)[:k]
    return np.mean(r)


def average_precision(r,cut):
    r = np.asarray(r)
    out = [precision_at_k(r, k + 1) for k in range(cut) if r[k]]
    if not out:
        return 0.
    return np.sum(out)/float(min(cut, np.sum(r)))

# This is literally their code, and I'd say that the "cut" is missing. 
# Fortunately they are not using this function later
def mean_average_precision(rs):
    return np.mean([average_precision(r) for r in rs])

In [25]:
r =  np.random.choice([0, 1], size=(10), p=[2./3, 1./3])

In [26]:
print(r)

[0 0 1 1 0 0 0 0 0 0]


In [27]:
precision_at_k(r, 5)

0.4

In [28]:
round(average_precision(r, 10), 3)

0.417

**Normalize Discounted Cumulative Gain**

here r can be binary or real scores

In [33]:
def dcg_at_k(r, k, method=1):
    r = np.asfarray(r)[:k]
    if r.size:
        if method == 0:
            return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
        elif method == 1:
            return np.sum(r / np.log2(np.arange(2, r.size + 2)))
        else:
            raise ValueError('method must be 0 or 1.')
    return 0.


def ndcg_at_k(r, k, method=1):
    dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k, method) / dcg_max

In [34]:
r =  np.random.choice([0, 1], size=(10), p=[2./3, 1./3])

In [35]:
print(r)

[0 0 0 1 1 1 0 0 0 0]


In [36]:
dcg_at_k(r, 5)

0.8175293653079347

**hit ratio**

In [37]:
def hit_at_k(r, k):
    r = np.array(r)[:k]
    if np.sum(r) > 0:
        return 1.
    else:
        return 0.

In [44]:
r =  np.random.choice([0, 1], size=(10), p=[0.9, 0.1])

In [45]:
print(r)

[0 0 1 0 0 0 0 0 0 0]


In [46]:
hit_at_k(r, 5)

1.0

**AUC**

sklearn's `roc_auc_score`

In [48]:
def auc(ground_truth, prediction):
    try:
        res = roc_auc_score(y_true=ground_truth, y_score=prediction)
    except Exception:
        res = 0.
    return res

**ranklists**

Let's create random inputs for this functions to see how they work

In [77]:
item_score = dict(zip(np.random.choice(100,100,replace=False), np.random.uniform(-3,3,size=100)))
user_pos_test = np.random.choice(200,10,replace=False)

In [78]:
def get_auc(item_score, user_pos_test):
    item_score = sorted(item_score.items(), key=lambda kv: kv[1])
    item_score.reverse()
    item_sort = [x[0] for x in item_score]
    posterior = [x[1] for x in item_score]

    r = []
    for i in item_sort:
        if i in user_pos_test:
            r.append(1)
        else:
            r.append(0)
    auc = auc(ground_truth=r, prediction=posterior)
    return auc

Let's have a look

In [79]:
item_score = sorted(item_score.items(), key=lambda kv: kv[1], reverse=True)

In [80]:
item_sort = [x[0] for x in item_score]
posterior = [x[1] for x in item_score]
print(item_sort[:20]), print(posterior[:20])

[33, 23, 4, 82, 43, 37, 81, 89, 88, 41, 78, 69, 61, 18, 28, 71, 6, 60, 2, 64]
[2.9564403176243577, 2.9330196467457696, 2.867083011350485, 2.848406768391536, 2.833077727251103, 2.7326935137282167, 2.5872690717311677, 2.581285827625182, 2.3968584069271213, 2.363423159165553, 2.355520875361422, 2.2837836700121983, 2.2695655669557286, 2.2597432075553563, 2.163855500098677, 1.9845766756313736, 1.9785417879707943, 1.9085644552504348, 1.7402995931098575, 1.5021268043628462]


(None, None)

Now simply, if the sorted items are among the user positive items, we append them to `r`, building a binary list where positive is 1

In [81]:
r = []
for i in item_sort:
    if i in user_pos_test:
        r.append(1)
    else:
        r.append(0)

The funcion `get_auc` will then return the result of `auc` which is in itself sklearn's `roc_auc_score`

In [82]:
from sklearn.metrics import roc_auc_score
roc_auc_score(r, posterior)

0.6252631578947369

In the case of `ranklist_by_heapq`, `test_items` and `ratings` are the items used in the testing phase and their corresponding ratins. 

In [83]:
def ranklist_by_heapq(user_pos_test, test_items, rating, Ks):
    item_score = {}
    for i in test_items:
        item_score[i] = rating[i]

    K_max = max(Ks)
    K_max_item_score = heapq.nlargest(K_max, item_score, key=item_score.get)

    r = []
    for i in K_max_item_score:
        if i in user_pos_test:
            r.append(1)
        else:
            r.append(0)
    auc = 0.
    return r, auc

let's see how the function goes line by line

In [84]:
Ks

[20, 40, 60, 80, 100]

In [85]:
K_max = max(Ks)

In [86]:
item_score = dict(zip(np.random.choice(100,100,replace=False), np.random.uniform(-3,3,size=100)))

In [87]:
K_max_item_score = heapq.nlargest(K_max, item_score, key=item_score.get)

In [92]:
print(K_max_item_score)

[60, 7, 89, 67, 6, 92, 55, 38, 31, 69, 65, 54, 40, 27, 85, 94, 58, 10, 99, 90, 91, 79, 26, 13, 88, 62, 95, 17, 76, 2, 3, 51, 96, 47, 52, 11, 57, 22, 75, 87, 15, 64, 12, 68, 86, 18, 33, 36, 74, 63, 42, 61, 59, 49, 39, 97, 53, 98, 28, 34, 8, 80, 45, 14, 72, 44, 56, 24, 21, 5, 46, 66, 83, 9, 81, 30, 4, 48, 43, 50, 37, 78, 32, 20, 73, 41, 84, 77, 16, 0, 19, 93, 29, 23, 1, 82, 25, 71, 35, 70]


In [89]:
print(user_pos_test)

[127  78  60 149 181  38  66 140  25 121]


In [90]:
r = []
for i in K_max_item_score:
    if i in user_pos_test:
        r.append(1)
    else:
        r.append(0)

In [91]:
print(r)

[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]


And the next function is exactly the same as `ranklist_by_heapq` only that it returns `r` and `auc` being auc

`auc = get_auc(item_score, user_pos_test)`

In [94]:
def ranklist_by_sorted(user_pos_test, test_items, rating, Ks):
    item_score = {}
    for i in test_items:
        item_score[i] = rating[i]

    K_max = max(Ks)
    K_max_item_score = heapq.nlargest(K_max, item_score, key=item_score.get)

    r = []
    for i in K_max_item_score:
        if i in user_pos_test:
            r.append(1)
        else:
            r.append(0)
    auc = get_auc(item_score, user_pos_test)
    return r, auc


**Test performance with early stopping**

Let's first have a look to the early stopping function. Is not really that complex

In [96]:
def early_stopping(log_value, best_value, stopping_step, expected_order='acc', flag_step=100):
    # early stopping strategy:
    assert expected_order in ['acc', 'dec']

    if (expected_order == 'acc' and log_value >= best_value) or (expected_order == 'dec' and log_value <= best_value):
        stopping_step = 0
        best_value = log_value
    else:
        stopping_step += 1

    if stopping_step >= flag_step:
        print("Early stopping is trigger at step: {} log:{}".format(flag_step, log_value))
        should_stop = True
    else:
        should_stop = False
    return best_value, stopping_step, should_stop

Now let's move again to the evaluation. The function below is simply an "aggregation" of all previous metrics/functions

In [97]:
def get_performance(user_pos_test, r, auc, Ks):
    precision, recall, ndcg, hit_ratio = [], [], [], []

    for K in Ks:
        precision.append(precision_at_k(r, K))
        recall.append(recall_at_k(r, K, len(user_pos_test)))
        ndcg.append(ndcg_at_k(r, K))
        hit_ratio.append(hit_at_k(r, K))

    return {'recall': np.array(recall), 'precision': np.array(precision),
            'ndcg': np.array(ndcg), 'hit_ratio': np.array(hit_ratio), 'auc': auc}

And now is when the test happens. First we test a user and then we will loop through all users

In [98]:
def test_one_user(x):
    # user u's ratings for user u
    rating = x[0]
    #uid
    u = x[1]
    #user u's items in the training set
    try:
        training_items = data_generator.train_items[u]
    except Exception:
        training_items = []
    #user u's items in the test set
    user_pos_test = data_generator.test_set[u]

    all_items = set(range(ITEM_NUM))

    # test items will be all items that are not in the training set
    test_items = list(all_items - set(training_items))

    # we get the binary rank (r) and the auc
    if test_flag == 'part':
        r, auc = ranklist_by_heapq(user_pos_test, test_items, rating, Ks)
    else:
        r, auc = ranklist_by_sorted(user_pos_test, test_items, rating, Ks)

    # and finally we add the auc to all other metrics 
    # (precision, recall, ndcg and hit_ratio) at different Ks values
    return get_performance(user_pos_test, r, auc, Ks)

And the only thing left is just test the performance for all users. Note that, within the `test` function they compute the ratings per user for all items:

        user_batch = test_users[start: end]
        item_batch = range(ITEM_NUM)
        rate_batch = sess.run(model.batch_ratings, {model.users: user_batch,
                                                    model.pos_items: item_batch,
                                                    model.node_dropout: [0.] * len(layer_size),
                                                    model.mess_dropout: [0.] * len(layer_size)})

Then within the `test_one_user()` function, and in particular within the ranklists functions, only those items that are not in training will be taken into account. This is, within the `test_one_user` you will find:

    test_items = list(all_items - set(training_items))

The `test_items` are passed to `ranklist_by_sorted(user_pos_test, test_items, rating, Ks)` and within that function you have:

    item_score = {}
    for i in test_items:
        item_score[i] = rating[i]

So only items in the test_set will be tested. 

In [99]:
def test(sess, model, users_to_test):
    result = {'precision': np.zeros(len(Ks)), 'recall': np.zeros(len(Ks)), 'ndcg': np.zeros(len(Ks)),
              'hit_ratio': np.zeros(len(Ks)), 'auc': 0.}

    u_batch_size = BATCH_SIZE * 2 # I guess this is for speed
    i_batch_size = BATCH_SIZE

    test_users = users_to_test
    n_test_users = len(test_users)
    n_user_batchs = n_test_users // u_batch_size + 1

    count = 0

    for u_batch_id in tqdm(range(n_user_batchs)):
        start = u_batch_id * u_batch_size
        end = (u_batch_id + 1) * u_batch_size

        # for every user we will run the model for all items. This is heavy
        # Since this is testing, we set dropout to 0
        user_batch = test_users[start: end]
        item_batch = range(ITEM_NUM)
        rate_batch = sess.run(model.batch_ratings, {model.users: user_batch,
                                                    model.pos_items: item_batch,
                                                    model.node_dropout: [0.] * len(layer_size),
                                                    model.mess_dropout: [0.] * len(layer_size)})

        user_batch_rating_uid = zip(rate_batch, user_batch)
        batch_result = [test_one_user(x) for x in user_batch_rating_uid]
        count += len(batch_result)

        for re in batch_result:
            result['precision'] += re['precision']/n_test_users
            result['recall'] += re['recall']/n_test_users
            result['ndcg'] += re['ndcg']/n_test_users
            result['hit_ratio'] += re['hit_ratio']/n_test_users
            result['auc'] += re['auc']/n_test_users


    assert count == n_test_users
    return result

In [100]:
users_to_test = list(data_generator.test_set.keys())

In [101]:
ret = test(sess, model, users_to_test)

100%|██████████| 15/15 [08:02<00:00, 28.74s/it]


In [105]:
ret

{'precision': array([0.02270748, 0.0166823 , 0.01370934, 0.01189422, 0.01066582]),
 'recall': array([0.0722882 , 0.10526895, 0.12927107, 0.14885057, 0.1658667 ]),
 'ndcg': array([0.12699322, 0.1534744 , 0.17019852, 0.18291935, 0.1937012 ]),
 'hit_ratio': array([0.32142139, 0.41871525, 0.4769576 , 0.51912385, 0.55362047]),
 'auc': 0.0}

Of course, train and test would be wrapped up in a loop over number of epochs and the early stop strategy is run like this:

In [106]:
cur_best_pre_0, stopping_step, should_stop = early_stopping(ret['recall'][0], cur_best_pre_0,
                                                            stopping_step, expected_order='acc', flag_step=5)

In [107]:
print(cur_best_pre_0, stopping_step, should_stop)

0.07228820021501922 0 False
