In [1]:
import tensorflow.keras as keras
from tensorflow.keras import backend as K
import numpy as np
from time import time
from olddatasetclass import Dataset
from evaluate import evaluate_model
from tensorflow.keras.optimizers import Adam, SGD
from item_to_genre import item_to_genre
import pandas as pd
from aux_loss import aux_crossentropy_loss
from utils import get_train_instances
import tensorflow as tf

In [2]:
def normalize(x):
    return x/(tf.norm(x) + 1)

norm_layer = keras.layers.Lambda(normalize)

In [38]:
x = tf.constant([1,1,2], dtype='float32')
y = normalize(x)
sess = tf.Session()
print(sess.run(y))

[0.28989795 0.28989795 0.5797959 ]


In [65]:
def init_normal(shape=[0, 0.05], seed=None):
    mean, stddev = shape
    return keras.initializers.RandomNormal(
        mean=mean, stddev=stddev, seed=seed)


def normalize(tensor):
    K.l2_normalize(tensor)
    return tensor


def get_model(num_users, num_items, num_tasks,
              e_dim=16, mlp_layer=[32], reg=0):
    """
    This function is used to get the Att-Mul-MF model described
    in the paper.
    Args:
        :param num_users: number of users in the dataset
        :param num_items: number of items in the dataset
        :param num_tasks: number of tasks (item genres)
        :param e_dim: the embedding dimension
        :param f_dim: the preference feature space dimension
        :param reg: regularization coefficient
    """
    num_layer = len(mlp_layer)
    # Input variables
    user_input = keras.layers.Input(shape=(1,), dtype='int32',
                                    name='user_input')
    item_input = keras.layers.Input(shape=(1,), dtype='int32',
                                    name='item_input')

    user_embedding = keras.layers.Embedding(
        input_dim=num_users, output_dim=int(e_dim),
        name='user_embedding',
        embeddings_initializer=init_normal(),
        embeddings_regularizer=keras.regularizers.l2(reg),
        input_length=1)

    item_embedding = keras.layers.Embedding(
        input_dim=num_items, output_dim=int(e_dim),
        name='item_embedding',
        embeddings_initializer=init_normal(),
        embeddings_regularizer=keras.regularizers.l2(reg),
        input_length=1)

#     aux_item_embedding = keras.layers.Embedding(
#         input_dim=num_items, output_dim=int(mlp_layer[0]),
#         name='aux_item_embedding',
#         embeddings_initializer=init_normal(),
#         embeddings_regularizer=keras.regularizers.l2(reg),
#         input_length=1)

    # Flatten the output tensor
    user_latent = keras.layers.Flatten()(user_embedding(user_input))
    item_latent = keras.layers.Flatten()(item_embedding(item_input))
#     aux_item_latent = keras.layers.Flatten()(aux_item_embedding(item_input))

    # GMF layer
    gmf_vector = keras.layers.Multiply()([user_latent, item_latent])


    # item vector feature extraction, split at the last layer
#     for idx in range(1, num_layer-1):
#         layer = keras.layers.Dense(
#             units=mlp_layer[idx],
#             activation='relu',
#             kernel_initializer='lecun_uniform',
#             kernel_regularizer=keras.regularizers.l2(reg),
#             name='aux_item_layer_{:d}'.format(idx))
#         aux_item_latent = layer(aux_item_latent)

    # create multitask item output.
    item_feature_list = []  # all item features are stored here
    for idx in range(0, num_tasks):
        layer = keras.layers.Dense(
            units=e_dim,
            activation='relu',
            kernel_initializer='lecun_uniform',
            kernel_regularizer=keras.regularizers.l2(reg),
            name='item_task_feature_{:d}'.format(idx))

        item_feature = layer(item_latent)
        item_feature = norm_layer(item_feature)
        item_feature_list.append(item_feature)

    item_out_list = []   # all item outputs are stored here
    for idx in range(0, num_tasks):
        layer = keras.layers.Dense(
            units=1,
            activation='relu',
            kernel_initializer='lecun_uniform',
            kernel_regularizer=keras.regularizers.l2(reg),
            kernel_constraint=keras.constraints.NonNeg(),
            name='item_task_out_{:d}'.format(idx))

        item_task_output = layer(item_feature_list[idx])
        item_out_list.append(item_task_output)

    item_outputs = keras.layers.Concatenate(name='item_outputs')(item_out_list)


    # Compute attention scores use item_feature_list
    item_feature_matrix = keras.layers.Concatenate()(item_feature_list)
    item_feature_matrix = keras.layers.Reshape(
        (num_tasks, mlp_layer[-1]))(item_feature_matrix)
    weight_vector = keras.layers.Dot(axes=(-1, -1))(
        [item_feature_matrix, gmf_vector])

#     weight_vector = keras.layers.Activation('softmax')(weight_vector)
    att_vector = keras.layers.Dot(axes=(-1, -2), name='attention_layer')(
        [weight_vector, item_feature_matrix])
    
    subtracted_vector = keras.layers.Subtract()([gmf_vector, att_vector])
    subtracted_out = keras.layers.Dot(axes=(-1, -1), name='l2_norm')([subtracted_vector, subtracted_vector])
    # att_vector = keras.layers.Flatten(name='attention_layer')(att_vector)

    #  Concatenate mlp_vector and att_vector
    # pred_vector = keras.layers.Concatenate()([gmf_vsector, att_vector])

    prediction = keras.layers.Dense(
        units=1, activation='sigmoid',
        kernel_initializer='lecun_uniform',
        kernel_regularizer=keras.regularizers.l2(reg),
        kernel_constraint=keras.constraints.NonNeg(),
        name='prediction')(gmf_vector)

    model = keras.models.Model(inputs=[user_input, item_input],
                               outputs=[prediction, item_outputs, subtracted_out])
    att_out = keras.models.Model(inputs=[user_input, item_input],
                               outputs=[weight_vector])
    return (model, att_out)

In [139]:

class Args(object):
    """Used to generate different sets of arguments"""
    def __init__(self):
        # default vaules
        self.model_name = 'att_cf'
        self.path = 'Data/'
        self.dataset = 'ml-1m'
        self.epochs = 10
        self.batch_size = 2024
        self.num_tasks = 18
        self.e_dim = 32
        self.mlp_layer = [256, 128, 64, 32]
        self.reg = 0
        self.num_neg = 4
        self.lr = 0.001
        self.loss_weights = [1, 0.5, 0.5]
        self.K = 10
        self.K2 = 20
        self.out = 1
        self.gmf_pretrain = ''
        self.mlp_pretrain = ''


def fit(args=Args()):
    # args = Args()
    model_out_file = 'Pretrain/%s_%s_%d_%s_%d.h5' %(args.model_name, args.dataset, args.e_dim, args.mlp_layer, time())
    result_out_file = 'outputs/%s_%s_top%d_edim%d_layer%s_%d.csv' %(args.model_name, args.dataset,
                                                                         args.K, args.e_dim,args.mlp_layer, time())
    topK = args.K
    topK2 = args.K2
    print("%s arguments: %s " % (args.model_name, [args.dataset, args.e_dim, args.mlp_layer]))

    # Load data
    t1 = time()
    if args.dataset == 'ml-1m':
        num_users = 6040
        num_items = 3952  # need modification
    elif args.dataset == 'ml-100k':
        num_users = 943
        num_items = 1682
    elif args.dataset == 'ciao':
        num_users = 17615 + 1
        num_items = 16121 + 1
    else:
        raise Exception('wrong dataset size!!!')

    dataset = Dataset(args.path, args.dataset)
    train, testRatings, testNegatives = (dataset.train_ratings,
                                         dataset.test_ratings,
                                         dataset.negatives)

    print("Load data done [%.1f s]. #user=%d, #item=%d, #train=%d, #test=%d"
          % (time()-t1, num_users, num_items, train.shape[0],
             testRatings.shape[0]))

    # Build model, att model is a sub-routine, no need to train it
    model, att_out = get_model(num_users,
                      num_items,
                      num_tasks=args.num_tasks,
                      e_dim=args.e_dim,
                      mlp_layer=args.mlp_layer,
                      reg=args.reg)

    model.compile(optimizer=Adam(lr=args.lr), loss='binary_crossentropy', loss_weights=args.loss_weights)
    print(model.summary())

    # Load pretrain model
    if args.gmf_pretrain != '' and args.mlp_layer != '':
        gmf = att_gmf_model.get_model(num_users, num_items, args.num_tasks, e_dim=args.e_dim, mlp_layer=args.mlp_layer, reg=args.reg)
        gmf.load_weights(args.gmf_pretrain)
        mlp = att_mlp_model.get_model(num_users, num_items, args.num_tasks, e_dim=args.e_dim, mlp_layer=args.mlp_layer, reg=args.reg)
        mlp.load_weights(args.mlp_pretrain)
        model = load_pretrain_model(model, gmf, mlp, len(args.mlp_layer),args.num_tasks)
        print("Load pretrained GMF (%s) and MLP (%s) models done. " %(args.gmf_pretrain, args.mlp_pretrain))

    # Init performance
    (hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, topK)
    (hits2, ndcgs2) = evaluate_model(model, testRatings, testNegatives, topK2)
    hr, ndcg = np.array(hits).mean(), np.array(ndcgs).mean()
    print('Init: HR = %.4f, NDCG = %.4f' % (hr, ndcg))
    best_hr, best_ndcg, best_iter = hr, ndcg, -1

    #dummy_genre = np.random.randn(4970845, args.num_tasks)
    
    # save Hit ratio and ndcg, loss
    output = pd.DataFrame(columns=['hr', 'ndcg', 'loss'])
    loss = 1.0 ## TODO
    output.loc[0] = [hr, ndcg, loss]

    # Training model
    for epoch in range(int(args.epochs)):
        t1 = time()
        # Generate training instances
        user_input, item_input, labels = get_train_instances(train, args.num_neg, num_items, args.num_neg)
        dummy_genre = item_to_genre(item_input, data_size=args.dataset).values
        dummy_genre = np.nan_to_num(dummy_genre)
        dummy_sim = np.zeros(len(user_input))
         # Training
        hist = model.fit([np.array(user_input), np.array(item_input)], #input
                         [np.array(labels), dummy_genre, dummy_sim], # labels 
                         batch_size=args.batch_size, epochs=1, verbose=1, shuffle=True)
        t2 = time()

        
        # Evaluation
        if epoch %1 == 0:
            (hits, ndcgs) = evaluate_model(model, testRatings, testNegatives, topK)
            (hits2, ndcgs2) = evaluate_model(model, testRatings, testNegatives, topK2)
            hr, ndcg, loss = np.array(hits).mean(), np.array(ndcgs).mean(), hist.history['loss'][0]
            hr2, ndcg2 = np.array(hits2).mean(), np.array(ndcgs2).mean()
            print('Iteration %d [%.1f s]: HR = %.4f, NDCG = %.4f, loss = %.4f [%.1f s]' 
                  % (epoch,  t2-t1, hr, ndcg, loss, time()-t2))
            print('K2 Iteration %d [%.1f s]: HR = %.4f, NDCG = %.4f, loss = %.4f [%.1f s]' 
                  % (epoch,  t2-t1, hr2, ndcg2, loss, time()-t2))
            if hr > best_hr:
                best_hr, best_ndcg, best_iter = hr, ndcg, epoch
                if args.out > 0:
                    model.save_weights(model_out_file, overwrite=True)

            output.loc[epoch+1] = [hr, ndcg, loss]
            
            # check user 0, item 1286 during training
            # 1286: 1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
            u = np.array([0])
            i = np.array([1286])
            att = att_out.predict([u, i])
            print(att)
        
    
            

    
    output.to_csv(result_out_file, index=False)
    print("End. Best Iteration %d:  HR = %.4f, NDCG = %.4f. " %(best_iter, best_hr, best_ndcg))
    return (model, att_out)
    


if __name__ == '__main__':
    args1 = Args()
    model, att_out = fit(args1)
    


att_cf arguments: ['ml-1m', 32, [256, 128, 64, 32]] 
Load data done [11.9 s]. #user=6040, #item=3952, #train=6040, #test=6040
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
item_input (InputLayer)         (None, 1)            0                                            
__________________________________________________________________________________________________
item_embedding (Embedding)      (None, 1, 32)        126464      item_input[0][0]                 
__________________________________________________________________________________________________
flatten_9 (Flatten)             (None, 32)           0           item_embedding[0][0]             
__________________________________________________________________________________________________
user_input (InputLayer)         (None, 1)            0                            

Init: HR = 0.1008, NDCG = 0.0444
Iteration 0 [113.4 s]: HR = 0.5397, NDCG = 0.3078, loss = 0.4946 [14.2 s]
K2 Iteration 0 [113.4 s]: HR = 0.7296, NDCG = 0.3558, loss = 0.4946 [14.2 s]
[[0.03310277 0.0533912  0.         0.00145177 0.         0.
  0.         0.04116584 0.         0.         0.         0.
  0.         0.         0.         0.         0.00056885 0.        ]]
Iteration 1 [104.9 s]: HR = 0.6169, NDCG = 0.3513, loss = 0.3330 [14.0 s]
K2 Iteration 1 [104.9 s]: HR = 0.7882, NDCG = 0.3949, loss = 0.3330 [14.0 s]
[[0.03254514 0.03953579 0.         0.         0.         0.
  0.         0.02952419 0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.        ]]
Iteration 2 [105.0 s]: HR = 0.6505, NDCG = 0.3749, loss = 0.2904 [14.0 s]
K2 Iteration 2 [105.0 s]: HR = 0.8113, NDCG = 0.4157, loss = 0.2904 [14.0 s]
[[0.03459149 0.03703922 0.         0.         0.         0.
  0.         0.02843433 0.         0.         0.         0.
  0.         0

In [34]:
class Args(object):
    """Used to generate different sets of arguments"""
    def __init__(self):
        # default vaules
        self.model_name = 'att_cf'
        self.path = 'Data/'
        self.dataset = 'ml-1m'
        self.epochs = 20
        self.batch_size = 1024
        self.num_tasks = 18
        self.e_dim = 32
        self.mlp_layer = [256, 128, 64, 32]
        self.reg = 0
        self.num_neg = 4
        self.lr = 0.001
        self.loss_weights = [1, 0.5, 0.5]
        self.K = 10
        self.K2 = 20
        self.out = 1
        self.gmf_pretrain = ''
        self.mlp_pretrain = ''
args = Args()
model, att_out = get_model(6040,
                  3952,
                  num_tasks=args.num_tasks,
                  e_dim=args.e_dim,
                  mlp_layer=args.mlp_layer,
                  reg=args.reg)

u = np.array([0])
i = np.array([2251])
att = att_out.predict([u, i])

In [206]:
def softmax(x):
    return np.exp(x)/sum(np.exp(x))

In [207]:
u = np.array([52])
i = np.array([292])
att = att_out.predict([u, i])
softmax(att)

array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1.]], dtype=float32)

In [198]:
u = np.array([40])
i = np.array([1371])
att = att_out.predict([u, i])
att/np.sum(np.absolute(att))

array([[ 0.07937698,  0.23666833,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.6772463 ,
        -0.00465667, -0.00205176,  0.        ]], dtype=float32)

In [180]:
u = np.array([2871])
i = np.array([1371])
p = model.predict([u,i])[0]
p

array([[0.14457345]], dtype=float32)

In [55]:
u = np.array([12])
i = np.array([1371])
att = att_out.predict([u, i])
att

array([[-0.02507338, -0.01671254,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        , -0.04235794,
         0.        ,  0.        ,  0.        ]], dtype=float32)

In [90]:
np.linalg.norm(att)

0.016627992

In [91]:
att

array([[0.0113492 , 0.00186937, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.01200796,
        0.        , 0.        , 0.        ]], dtype=float32)

In [203]:
att

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.06310958, 0.        , 0.06819441, 0.        , 0.        ,
        0.00252415, 0.        , 0.03397936, 0.04128227, 0.        ,
        0.04386356, 0.        , 0.        ]], dtype=float32)