## SASRec in Tensorflow 2.0

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import time
import argparse
import tensorflow as tf
import numpy as np
import sys
from tqdm import tqdm
import scrapbook as sb

print("System version: {}".format(sys.version))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.7.7 (default, Mar 26 2020, 15:48:22) 
[GCC 7.3.0]
Tensorflow version: 2.3.0


In [None]:
import os

# disable GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [3]:
sys.path.insert(0, "/recsys_data/RecSys/recommenders-tf2/myfork/recommenders/recommenders/models/sasrec")

In [4]:
from sampler import WarpSampler
from model import SASREC
from model_ssept import SSEPT
from util import *

In [5]:
def create_combined_dataset(u, seq, pos, neg, seq_max_len):
    inputs = {}
    seq = tf.keras.preprocessing.sequence.pad_sequences(seq,
            padding='pre',
            truncating='pre',
            maxlen=seq_max_len)
    pos = tf.keras.preprocessing.sequence.pad_sequences(pos,
            padding='pre',
            truncating='pre',
            maxlen=seq_max_len)
    neg = tf.keras.preprocessing.sequence.pad_sequences(neg,
            padding='pre',
            truncating='pre',
            maxlen=seq_max_len)

    inputs['users'] = np.expand_dims(np.array(u), axis=-1)
    inputs['input_seq'] = seq
    inputs['positive'] = pos
    inputs['negative'] = neg

    target = np.concatenate([np.repeat(1, seq.shape[0] * seq.shape[1]), 
                             np.repeat(0, seq.shape[0] * seq.shape[1])], axis=0)
    target = np.expand_dims(target, axis=-1)
    return inputs, target


In [6]:
yaml_file = '../../recommenders/models/sasrec/config/sas_rec.yaml'  

In [7]:
num_epochs = 5
batch_size = 128
RANDOM_SEED = 100  # Set None for non-deterministic result

data_path = os.path.join("..", "..", "tests", "resources", "deeprec", "slirec")

In [8]:
data_dir = "/recsys_data/RecSys/SASRec-tf2/data/"
dataset = "ae"
batch_size = 128
num_epochs = 5
lr = 0.001

maxlen = 50
num_blocks = 2
hidden_units = 100
num_heads = 1
dropout_rate = 0.1
l2_emb = 0.0
num_neg_test = 100

In [9]:
inp_file = os.path.join(data_dir, dataset + ".txt")
print(inp_file)

dataset = data_partition(inp_file)
[user_train, user_valid, user_test, usernum, itemnum] = dataset
num_steps = int(len(user_train) / batch_size)
cc = 0.0
for u in user_train:
    cc += len(user_train[u])
print('%g Users and %g items' % (usernum, itemnum))
print('average sequence length: %.2f' % (cc / len(user_train)))

/recsys_data/RecSys/SASRec-tf2/data/ae.txt
63114 Users and 85930 items
average sequence length: 13.04


In [10]:
model = SASREC(item_num=itemnum,
               seq_max_len=maxlen,
               num_blocks=num_blocks,
               embedding_dim=hidden_units,
               attention_dim=hidden_units,
               attention_num_heads=num_heads,
               dropout_rate=dropout_rate,
            #    conv_dims = kwargs.get("conv_dims", [100, 100])
               l2_reg=l2_emb,
               num_neg_test=num_neg_test
)

In [11]:
optimizer = tf.keras.optimizers.Adam(learning_rate=lr,
                                        beta_1=0.9,
                                        beta_2=0.999,
                                        epsilon=1e-7)

def loss_function(pos_logits, neg_logits, istarget):
    pos_logits = pos_logits[:,0]
    neg_logits = neg_logits[:,0]

    # for logits
    loss = tf.reduce_sum(
        - tf.math.log(tf.math.sigmoid(pos_logits) + 1e-24) * istarget -
        tf.math.log(1 - tf.math.sigmoid(neg_logits) + 1e-24) * istarget
    ) / tf.reduce_sum(istarget)

    # for probabilities
    # loss = tf.reduce_sum(
    #         - tf.math.log(pos_logits + 1e-24) * istarget -
    #         tf.math.log(1 - neg_logits + 1e-24) * istarget
    # ) / tf.reduce_sum(istarget)
    reg_loss = tf.compat.v1.losses.get_regularization_loss()
    # reg_losses = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)
    # loss += sum(reg_losses)
    loss += reg_loss
    return loss

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.Mean(name='train_accuracy')

train_step_signature = [
    {'users': tf.TensorSpec(shape=(None, 1), dtype=tf.int64),
     'input_seq': tf.TensorSpec(shape=(None, maxlen), dtype=tf.int64),
     'positive': tf.TensorSpec(shape=(None, maxlen), dtype=tf.int64),
     'negative': tf.TensorSpec(shape=(None, maxlen), dtype=tf.int64)},
    tf.TensorSpec(shape=(None, 1), dtype=tf.int64)
]

@tf.function(input_signature=train_step_signature)
def train_step(inp, tar):
    with tf.GradientTape() as tape:
        pos_logits, neg_logits, loss_mask = model(inp, training=True)
        loss = loss_function(pos_logits, neg_logits, loss_mask)
        # loss = loss_function_(tar, predictions)
        # loss = model.loss_function(*predictions)

    gradients = tape.gradient(loss, model.trainable_variables)    
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    # train_accuracy(accuracy_function(tar, predictions))
    return loss

In [12]:
sampler = WarpSampler(user_train, usernum, itemnum, batch_size=batch_size, maxlen=maxlen, n_workers=3)

In [13]:
T = 0.0
t0 = time.time()

for epoch in range(1, num_epochs + 1):

    step_loss = []
    train_loss.reset_states()
    for step in tqdm(range(num_steps), total=num_steps, ncols=70, leave=False, unit='b'):

        u, seq, pos, neg = sampler.next_batch()

        inputs, target = create_combined_dataset(u, seq, pos, neg, maxlen)

        loss = train_step(inputs, target)
        step_loss.append(loss)

#     print(f"Epoch: {epoch}, Loss: {np.mean(step_loss):.3f}, {train_loss.result():.3f}")
        
    if epoch % 2 == 0:
        t1 = time.time() - t0
        T += t1
        print('Evaluating...')
        t_test = evaluate(model, dataset, maxlen, num_neg_test)
        t_valid = evaluate_valid(model, dataset, maxlen, num_neg_test)
        print(f"\nepoch: {epoch}, time: {T}, valid (NDCG@10: {t_valid[0]}, HR@10: {t_valid[1]})")
        print(f"epoch: {epoch}, time: {T},  test (NDCG@10: {t_test[0]}, HR@10: {t_test[1]})")

#         f.write(str(t_valid) + ' ' + str(t_test) + '\n')
#         f.flush()
        t0 = time.time()

t_test = evaluate(model, dataset, maxlen, num_neg_test)
print(f"\nepoch: {epoch}, test (NDCG@10: {t_test[0]}, HR@10: {t_test[1]})")


  0%|                                          | 0/493 [00:00<?, ?b/s]

Instructions for updating:
Do not pass `graph_parents`.  They will  no longer be used.


                                                                      

Evaluating...


  0%|▏                                 | 2/493 [00:00<00:30, 16.04b/s]


epoch: 2, time: 64.906325340271, valid (NDCG@10: 0.3211998443237736, HR@10: 0.5113)
epoch: 2, time: 64.906325340271,  test (NDCG@10: 0.2711126682237444, HR@10: 0.4452)


                                                                      

Evaluating...


  0%|▏                                 | 2/493 [00:00<00:29, 16.70b/s]


epoch: 4, time: 126.3671600818634, valid (NDCG@10: 0.3402122173401346, HR@10: 0.5383)
epoch: 4, time: 126.3671600818634,  test (NDCG@10: 0.2973869443338476, HR@10: 0.4787)


                                                                      


epoch: 5, test (NDCG@10: 0.3014105123529007, HR@10: 0.4875)




In [17]:
res_syn = {"ndcg@10": t_test[0], "Hit@10": t_test[1]}
print(res_syn)

{'ndcg@10': 0.3014105123529007, 'Hit@10': 0.4875}


In [15]:
# Record results with papermill for tests - ignore this cell
# sb.glue("res_syn", res_syn)

sb.glue("ndcg@10", t_test[0])
sb.glue("Hit@10", t_test[1])