# Prise en main du code de l'article NGCF


La prise en main du code du site compagnon n'est pas aisé car le code enchaine de multiples fonctions et méthodes.
L'objectif de ce notebook est donc de disséquer le code du site compagnon afin d'en comprendre les rouages et de revenir aux fondements théoriques de l'article.
( pour une lecture avec un plan, utiliser jupyter lab)

Cela permet également de voir comment réécrire le code en tensorflow 2.x.
Dans les points TF non compatibles ont a le placeholder, l'initialisation de Glorot, random_uniform, sparse_retain, tf.div, tf.sparse_dense_matmul





In [22]:
import numpy as np
import tensorflow as tf

from utility.load_data import *


A des fins exploratoires les attributs de l'objet NGCF sont passés en paramètres :

In [42]:
#donnees ml100
n_users = 943
n_items = 1682
emb_dim = 40 #hyperparamètre
weight_size = [64,64]  #('--layer_size', nargs='?', default='[64]', help='Output sizes of every layer')
n_layers = len(weight_size)  #[64] -> 1 couche.  [64, 64] -> 2 couches
batch_size = 64
decay = 0.1
mess_dropout =  [0.1,0.1,0.1]
#dans un batch on va avoir des items qui seront à la fois positifs et négatifs pour les 2 listes

n_fold = 100
keep_prob = 0.7
node_dropout = [0.7]

In [24]:
from os import getcwd
getcwd()

'/home/jovyan/MonDossier/neural_graph_collaborative_filtering/prise_en_main'

## Création des poids

In [25]:
 def _init_weights(emb_dim, weight_size, n_layers):
        all_weights = dict()

        initializer = tf.keras.initializers.GlorotUniform()
        all_weights['user_embedding'] = tf.Variable(initializer([n_users, emb_dim]), name='user_embedding')
        all_weights['item_embedding'] = tf.Variable(initializer([n_items, emb_dim]), name='item_embedding')
        
        weight_size_list = [emb_dim] + weight_size

        for k in range(n_layers):
            all_weights['W_gc_%d' %k] = tf.Variable(
                initializer([weight_size_list[k], weight_size_list[k+1]]), name='W_gc_%d' % k)
            all_weights['b_gc_%d' %k] = tf.Variable(
                initializer([1, weight_size_list[k+1]]), name='b_gc_%d' % k)

            all_weights['W_bi_%d' % k] = tf.Variable(
                initializer([weight_size_list[k], weight_size_list[k + 1]]), name='W_bi_%d' % k)
            all_weights['b_bi_%d' % k] = tf.Variable(
                initializer([1, weight_size_list[k + 1]]), name='b_bi_%d' % k)

            all_weights['W_mlp_%d' % k] = tf.Variable(
                initializer([weight_size_list[k], weight_size_list[k+1]]), name='W_mlp_%d' % k)
            all_weights['b_mlp_%d' % k] = tf.Variable(
                initializer([1, weight_size_list[k+1]]), name='b_mlp_%d' % k)

        return all_weights

In [26]:
weights = _init_weights( emb_dim = emb_dim, weight_size = weight_size, n_layers = n_layers)

## Accès aux données

In [27]:
# users, pos_items, neg_items = data_generator.sample()

# un neg_item c'est un item du train qui n'est pas dans le batch

data_generator = Data(path='../Data/ml-100k', batch_size=batch_size)



n_users=943, n_items=1682
n_interactions=100000
n_train=90404, n_test=9596, sparsity=0.06305


Pour générer un batch on exécute la méthode sample :

In [28]:
users, pos_items, neg_items = data_generator.sample()

In [29]:
#pos_items, users

On remarque que,marginalement, des items sont à la fois perçus comme positifs et négatifs

In [30]:
len(list(set(pos_items) & set(neg_items)))

2

## Aperçu de la loss

In [31]:
tf.multiply(users, pos_items)

<tf.Tensor: shape=(64,), dtype=int32, numpy=
array([ 228408,  211342,   51030,  122264,  497595,   13175,  667818,
         96480,  382652,   83277,   63648,   77464,  273861,  156420,
         26145,  109612,    4662,  292020,  261999,   48396,    6555,
         10569,   14010,  253764,  278425,  201474,  188856,    9276,
       1029129,   32175,    9106,   69480,  225388,  139896,   27900,
        140996,  291797,  236080,  189270,  830308,  354090,  114448,
          3856,  142690,   38448,    7808,  138047,  388080,   24444,
        245841,  230272,  737019,   88198,  437570,  823528,   11700,
        136290,  178437,    7068,  162165,  142330,  162434,   18270,
        264421], dtype=int32)>

On constate qu'avec un argument négatif très fort la log sigmoid retourne - $\infty$  : 

In [32]:
def create_bpr_loss(decay, batch_size, users, pos_items, neg_items):
        pos_scores = tf.reduce_sum(tf.multiply(users, pos_items))
        neg_scores = tf.reduce_sum(tf.multiply(users, neg_items))

        regularizer = tf.nn.l2_loss(users) + tf.nn.l2_loss(pos_items) + tf.nn.l2_loss(neg_items)
        regularizer = regularizer/batch_size
        
        # In the first version, we implement the bpr loss via the following codes:
        # We report the performance in our paper using this implementation.
        maxi = tf.math.log(tf.nn.sigmoid(pos_scores - neg_scores))
        print("Maxi : ",maxi)
        mf_loss = tf.negative(tf.reduce_mean(maxi))
        print("MF loss V1 :",mf_loss)
        ## In the second version, we implement the bpr loss via the following codes to avoid 'NAN' loss during training:
        ## However, it will change the training performance and training performance.
        ## Please retrain the model and do a grid search for the best experimental setting.
        mf_loss = tf.reduce_sum(tf.nn.softplus(-(pos_scores - neg_scores)))
        print("MF loss V2 :",mf_loss)

        emb_loss = decay * regularizer

        reg_loss = tf.constant(0.0, tf.float32, [1])

        return pos_scores, neg_scores,  mf_loss, emb_loss, reg_loss

In [33]:
users2     = tf.constant(users, dtype='float32')
pos_items2 = tf.constant(pos_items, dtype='float32')
neg_items2 = tf.constant(neg_items, dtype='float32')

tf.multiply(users2, pos_items2)
pos_scores = tf.reduce_sum(tf.multiply(users2, pos_items2))
neg_scores = tf.reduce_sum(tf.multiply(users2, neg_items2))
print(pos_scores - neg_scores)



tf.Tensor(-12241492.0, shape=(), dtype=float32)


In [34]:
create_bpr_loss(decay=decay, batch_size=batch_size, users=users2, pos_items=pos_items2, neg_items=neg_items2)

Maxi :  tf.Tensor(-inf, shape=(), dtype=float32)
MF loss V1 : tf.Tensor(inf, shape=(), dtype=float32)
MF loss V2 : tf.Tensor(12241492.0, shape=(), dtype=float32)


(<tf.Tensor: shape=(), dtype=float32, numpy=12710176.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=24951668.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=12241492.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=75838.6>,
 <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>)

L'ordre de grandeur de la loss v2 me parait très (trop) important et je garde en tête d'appliquer éventuellement un **clipping** car on peut avoir des risques d'explosion de gradient.

## Matrice d'adjacence

L'objet Data dans le module utiity/load_data contient l'outillage pour générer des matrices d'adjacence et les sauvegardée sur disque. Si les matrices ne sont pas déjà existentes elles sont crées alors dans le dossier Data du jeu de données en cours.

In [35]:
plain_adj, norm_adj, mean_adj = data_generator.get_adj_mat()

already load adj matrix (2625, 2625) 0.02720165252685547


3 matrices sont ainsi crées, on n'en utilise qu'une seule selon l'option précisée adj_type. J'ai une préférence pour une matrice normalisée.  

In [36]:
norm_adj

<2625x2625 sparse matrix of type '<class 'numpy.float64'>'
	with 183433 stored elements in Compressed Sparse Row format>

In [37]:
norm_adj[0,0],  norm_adj[25,30] 

(0.0040650406504065045, 0.0)

Pour la suite on a besoin de l'attribut n_nonzero_elems :


In [46]:
n_nonzero_elems = norm_adj.count_nonzero()
n_nonzero_elems

183433

# Création des embeddings

Le code gère la problématique de saturation RAM en splittant la matrice d'adjacence dans une liste de sous-matrice et en prenant en compte possiblement l'utilisation de drop out.

On a besoin de pas mal d'outillage pour créer les embeddings :

In [63]:
def _create_ngcf_embed(node_dropout_flag , norm_adj, weights, mess_dropout, n_users, n_items, n_layers, n_fold = n_fold, node_dropout = node_dropout):
# Generate a set of adjacency sub-matrix.
    if node_dropout_flag:
        # node dropout.
        A_fold_hat = _split_A_hat_node_dropout(norm_adj)
    else:
        A_fold_hat = _split_A_hat(norm_adj)

    ego_embeddings = tf.concat([weights['user_embedding'], weights['item_embedding']], axis=0)

    all_embeddings = [ego_embeddings]

    for k in range(0, n_layers):

        temp_embed = []
        for f in range(n_fold):
            temp_embed.append(tf.sparse.sparse_dense_matmul(A_fold_hat[f], ego_embeddings))

        # sum messages of neighbors.
        side_embeddings = tf.concat(temp_embed, 0)
        # transformed sum messages of neighbors.
        sum_embeddings = tf.nn.leaky_relu(
            tf.matmul(side_embeddings, weights['W_gc_%d' % k]) + weights['b_gc_%d' % k])

        # bi messages of neighbors.
        bi_embeddings = tf.multiply(ego_embeddings, side_embeddings)
        # transformed bi messages of neighbors.
        bi_embeddings = tf.nn.leaky_relu(
            tf.matmul(bi_embeddings, weights['W_bi_%d' % k]) + weights['b_bi_%d' % k])

        # non-linear activation.
        ego_embeddings = sum_embeddings + bi_embeddings

        # message dropout.
        ego_embeddings = tf.nn.dropout(ego_embeddings, 1 - mess_dropout[k])

        # normalize the distribution of embeddings.
        norm_embeddings = tf.nn.l2_normalize(ego_embeddings, axis=1)

        all_embeddings += [norm_embeddings]

    all_embeddings = tf.concat(all_embeddings, 1)
    u_g_embeddings, i_g_embeddings = tf.split(all_embeddings, [n_users, n_items], 0)
    return u_g_embeddings, i_g_embeddings



def _convert_sp_mat_to_sp_tensor(X):
    coo = X.tocoo().astype(np.float32)
    indices = np.mat([coo.row, coo.col]).transpose()
    return tf.SparseTensor(indices, coo.data, coo.shape)

def _dropout_sparse(X, keep_prob, n_nonzero_elems):
    """
    Dropout for sparse tensors.
    """
    noise_shape = [n_nonzero_elems]
    random_tensor = keep_prob
    random_tensor += tf.random.uniform(noise_shape)
    dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool)
    pre_out = tf.sparse.retain(X, dropout_mask)

    return pre_out * tf.math.divide(1., keep_prob)



def _split_A_hat(X):
    A_fold_hat = []

    fold_len = (n_users + n_items) // n_fold
    for i_fold in range(n_fold):
        start = i_fold * fold_len
        if i_fold == n_fold -1:
            end = n_users + n_items
        else:
            end = (i_fold + 1) * fold_len

        A_fold_hat.append(_convert_sp_mat_to_sp_tensor(X[start:end]))
    return A_fold_hat

def _split_A_hat_node_dropout( X):
    A_fold_hat = []

    fold_len = (n_users + n_items) // n_fold
    for i_fold in range(n_fold):
        start = i_fold * fold_len
        if i_fold == n_fold -1:
            end = n_users + n_items
        else:
            end = (i_fold + 1) * fold_len

        # A_fold_hat.append(self._convert_sp_mat_to_sp_tensor(X[start:end]))
        temp = _convert_sp_mat_to_sp_tensor(X[start:end])
        n_nonzero_temp = X[start:end].count_nonzero()
        A_fold_hat.append(_dropout_sparse(temp, 1 - node_dropout[0], n_nonzero_temp))

    return A_fold_hat



In [64]:
_create_ngcf_embed(node_dropout_flag=1 , norm_adj = norm_adj , weights = weights , node_dropout = node_dropout, mess_dropout = mess_dropout, n_users = n_users , n_items = n_items,n_layers = n_layers, n_fold = 100)

(<tf.Tensor: shape=(943, 168), dtype=float32, numpy=
 array([[ 0.01808818, -0.0551286 ,  0.04615904, ...,  0.        ,
          0.        ,  0.        ],
        [-0.07797763,  0.03643166,  0.02175254, ...,  0.        ,
          0.        ,  0.        ],
        [-0.00193043, -0.07539124,  0.06724135, ...,  0.        ,
          0.28094506, -0.        ],
        ...,
        [ 0.04744322,  0.05934034,  0.05376158, ...,  0.        ,
         -0.        , -0.        ],
        [-0.04016246, -0.00218273, -0.04625681, ...,  0.        ,
          0.        ,  0.        ],
        [-0.00753046, -0.06106514, -0.07502777, ...,  0.        ,
          0.        ,  0.        ]], dtype=float32)>,
 <tf.Tensor: shape=(1682, 168), dtype=float32, numpy=
 array([[-0.00897326,  0.03025502,  0.00671209, ...,  0.        ,
          0.        , -0.        ],
        [ 0.00470646, -0.03025699,  0.04256403, ...,  0.        ,
          0.        ,  0.        ],
        [-0.02412724,  0.01497417,  0.04873864

In [None]:
class NGCF(object):
    def __init__(self, n_users, n_items, emb_dim, weight_size, n_layers):
        self.n_users = n_users
        self.n_items = n_items
        self.emb_dim = emb_dim
        self.weight_size = weight_size
        self.weight_size_list = 0
        self.n_layers = n_layers
        self.weights = self._init_weights()
        
        
        self.ua_embeddings, self.ia_embeddings = self._create_ngcf_embed()
        
        self.u_g_embeddings = tf.nn.embedding_lookup(self.ua_embeddings, self.users)
        self.pos_i_g_embeddings = tf.nn.embedding_lookup(self.ia_embeddings, self.pos_items)
        self.neg_i_g_embeddings = tf.nn.embedding_lookup(self.ia_embeddings, self.neg_items)
        
        
        ego_embeddings = tf.concat([self.weights['user_embedding'], self.weights['item_embedding']], axis=0)
        all_embeddings = [ego_embeddings]
         # Original embedding.
        u_e = tf.nn.embedding_lookup(self.weights['user_embedding'], self.users)
        pos_i_e = tf.nn.embedding_lookup(self.weights['item_embedding'], self.pos_items)
        neg_i_e = tf.nn.embedding_lookup(self.weights['item_embedding'], self.neg_items)

        # All ratings for all users.
        #self.batch_ratings = self._create_batch_ratings(u_e, pos_i_e)
        
        self.mf_loss, self.emb_loss, self.reg_loss = self.create_bpr_loss(u_e, pos_i_e, neg_i_e)
        self.loss = self.mf_loss + self.emb_loss + self.reg_loss

        # self.dy_lr = tf.train.exponential_decay(self.lr, self.global_step, 10000, self.lr_decay, staircase=True)
        # self.opt = tf.train.RMSPropOptimizer(learning_rate=self.dy_lr).minimize(self.loss, global_step=self.global_step)
        self.opt = tf.train.RMSPropOptimizer(learning_rate=self.lr).minimize(self.loss)
        # self.updates = self.opt.minimize(self.loss, var_list=self.weights)

        
        
    def _init_weights(self):
        all_weights = dict()

        initializer = tf.keras.initializers.GlorotUniform()
        all_weights['user_embedding'] = tf.Variable(initializer([self.n_users, self.emb_dim]), name='user_embedding')
        all_weights['item_embedding'] = tf.Variable(initializer([self.n_items, self.emb_dim]), name='item_embedding')
        
        self.weight_size_list = [self.emb_dim] + self.weight_size

        for k in range(self.n_layers):
            all_weights['W_gc_%d' %k] = tf.Variable(
                initializer([self.weight_size_list[k], self.weight_size_list[k+1]]), name='W_gc_%d' % k)
            all_weights['b_gc_%d' %k] = tf.Variable(
                initializer([1, self.weight_size_list[k+1]]), name='b_gc_%d' % k)

            all_weights['W_bi_%d' % k] = tf.Variable(
                initializer([self.weight_size_list[k], self.weight_size_list[k + 1]]), name='W_bi_%d' % k)
            all_weights['b_bi_%d' % k] = tf.Variable(
                initializer([1, self.weight_size_list[k + 1]]), name='b_bi_%d' % k)

            all_weights['W_mlp_%d' % k] = tf.Variable(
                initializer([self.weight_size_list[k], self.weight_size_list[k+1]]), name='W_mlp_%d' % k)
            all_weights['b_mlp_%d' % k] = tf.Variable(
                initializer([1, self.weight_size_list[k+1]]), name='b_mlp_%d' % k)

        return all_weights
    
    def create_bpr_loss(decay, batch_size, users, pos_items, neg_items):
        pos_scores = tf.reduce_sum(tf.multiply(users, pos_items))
        neg_scores = tf.reduce_sum(tf.multiply(users, neg_items))

        regularizer = tf.nn.l2_loss(users) + tf.nn.l2_loss(pos_items) + tf.nn.l2_loss(neg_items)
        regularizer = regularizer/batch_size
        
        # In the first version, we implement the bpr loss via the following codes:
        # We report the performance in our paper using this implementation.
        #maxi = tf.math.log(tf.nn.sigmoid(pos_scores - neg_scores))
        #mf_loss = tf.negative(tf.reduce_mean(maxi))
        
        ## In the second version, we implement the bpr loss via the following codes to avoid 'NAN' loss during training:
        ## However, it will change the training performance and training performance.
        ## Please retrain the model and do a grid search for the best experimental setting.
        mf_loss = tf.reduce_sum(tf.nn.softplus(-(pos_scores - neg_scores)))
        

        emb_loss = decay * regularizer

        reg_loss = tf.constant(0.0, tf.float32, [1])

        return pos_scores, neg_scores,  mf_loss, emb_loss, reg_loss
    
    def _create_ngcf_embed(self):
        # Generate a set of adjacency sub-matrix.
        if self.node_dropout_flag:
            # node dropout.
            A_fold_hat = self._split_A_hat_node_dropout(self.norm_adj)
        else:
            A_fold_hat = self._split_A_hat(self.norm_adj)

        ego_embeddings = tf.concat([self.weights['user_embedding'], self.weights['item_embedding']], axis=0)

        all_embeddings = [ego_embeddings]

        for k in range(0, self.n_layers):

            temp_embed = []
            for f in range(self.n_fold):
                temp_embed.append(tf.sparse_tensor_dense_matmul(A_fold_hat[f], ego_embeddings))

            # sum messages of neighbors.
            side_embeddings = tf.concat(temp_embed, 0)
            # transformed sum messages of neighbors.
            sum_embeddings = tf.nn.leaky_relu(
                tf.matmul(side_embeddings, self.weights['W_gc_%d' % k]) + self.weights['b_gc_%d' % k])

            # bi messages of neighbors.
            bi_embeddings = tf.multiply(ego_embeddings, side_embeddings)
            # transformed bi messages of neighbors.
            bi_embeddings = tf.nn.leaky_relu(
                tf.matmul(bi_embeddings, self.weights['W_bi_%d' % k]) + self.weights['b_bi_%d' % k])

            # non-linear activation.
            ego_embeddings = sum_embeddings + bi_embeddings

            # message dropout.
            ego_embeddings = tf.nn.dropout(ego_embeddings, 1 - self.mess_dropout[k])

            # normalize the distribution of embeddings.
            norm_embeddings = tf.math.l2_normalize(ego_embeddings, axis=1)

            all_embeddings += [norm_embeddings]

        all_embeddings = tf.concat(all_embeddings, 1)
        u_g_embeddings, i_g_embeddings = tf.split(all_embeddings, [self.n_users, self.n_items], 0)
        return u_g_embeddings, i_g_embeddings


In [None]:
model = NGCF(n_users, n_items, emb_dim, weight_size,n_layers)
poids = model._init_weights()

In [None]:
poids

In [None]:
initializer = tf.keras.initializers.GlorotUniform()

model = keras.Sequential(
    [
        tf.keras.layers.Embedding(input_dim, output_dim,embeddings_initializer='GlorotUniform'),
        layers.Dense(3, activation="relu", name="layer2"),
        layers.Dense(4, name="layer3"),
    ]
)
# Call model on a test input
x = tf.ones((3, 3))
y = model(x)





In [None]:
x = tf.Variable(tf.random.uniform([5, 30], -1, 1))


In [None]:
split0, split1, split2 = tf.split(x, [4, 15, 11], 1)


In [None]:
split0