In [9]:
import tensorflow as tf
from hgru4rec.user_par_mini_batch import input_fn, UserParallelMiniBatchDataset
from tensorflow.contrib.cudnn_rnn import CudnnGRU
from tensorflow.keras.layers import GRU, Dense, Dropout

In [2]:
tf.enable_eager_execution()

In [3]:
params = dict()
params['num_units_session'] = 25
params['num_units_user'] = 50
params['num_products'] = 2600000
params['embedding_size'] = 25
params['user_rnn_layers'] = 2
params['user_rnn_units'] = 50
params['session_rnn_layers'] = 2
params['session_rnn_units'] = 25
params['num_negative_samples'] = 10

In [4]:
# get datapoint iterator
dataset = input_fn(10, 'gs://ma-muy/baseline_dataset/sessions_by_user/', 3, epochs=2)
datapoints = dataset.make_one_shot_iterator()

Instructions for updating:
tf.py_func is deprecated in TF V2. Instead, use
    tf.py_function, which takes a python function which manipulates tf eager
    tensors instead of numpy arrays. It's easy to convert a tf eager tensor to
    an ndarray (just call tensor.numpy()) but having access to eager tensors
    means `tf.py_function`s can use accelerators such as GPUs as well as
    being differentiable using a gradient tape.
    
Instructions for updating:
Colocations handled automatically by placer.


In [25]:
datapoint = next(datapoints)
features, labels = datapoint
features

{'UserId': <tf.Tensor: id=1209, shape=(10,), dtype=int64, numpy=
 array([ 514600, 2130900, 3111300, 1140200, 2497600, 1234000,  377000,
        3035900, 1309300,  348600])>,
 'ProductId': <tf.Tensor: id=1207, shape=(10,), dtype=int64, numpy=
 array([     -1, 5765285, 5631179, 4684774,      -1, 2581879,  437249,
        6045207,  708882, 5884728])>,
 'EmbeddingId': <tf.Tensor: id=1206, shape=(10,), dtype=int64, numpy=array([-1, 21, 22, 23, -1,  5, 24, 25, 19, 26])>,
 'UserEmbeddingId': <tf.Tensor: id=1208, shape=(10,), dtype=int64, numpy=array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])>}

In [7]:
batch_size = features['UserId'].shape[0]

# Mask describing ended sessions, true if session ended
ended_sessions_mask = tf.get_variable(
    'ended_sessions_mask',
    shape=(batch_size,),
    initializer=tf.zeros_initializer(),
    trainable=False,
    dtype=tf.bool)

# Mask describing ending sessions, true if session is ending
ending_sessions_mask = tf.get_variable(
    'ending_sessions_mask',
    shape=(batch_size,),
    initializer=tf.zeros_initializer(),
    trainable=False,
    dtype=tf.bool)

# Mask describing ended users, true if not more user events
ended_users_mask = tf.get_variable(
    'ended_users_mask',
    shape=(batch_size,),
    initializer=tf.zeros_initializer(),
    trainable=False,
    dtype=tf.bool)

# Hidden states of session_rnn
session_hidden_states = tf.get_variable(
    'session_hidden_states',
    shape=(batch_size, params['session_rnn_units']),
    initializer=tf.zeros_initializer())

# User hidden_states, updated by user_rnn
user_hidden_states = tf.get_variable(
    'user_hidden_states',
    shape=(batch_size, params['user_rnn_units']),
    initializer=tf.zeros_initializer()
)

# Softmax weights to map RNN output to product space
softmax_weights = tf.get_variable(
    'softmax_weights',
    shape=(params['num_products'], params['session_rnn_units']))

# Biases for above
softmax_biases = tf.get_variable(
    'softmax_biases',
    shape=(params['num_products'],))

In [10]:
user_rnn = GRU(
    # params['user_rnn_layers'],
    params['user_rnn_units'],
    return_state=True,
    implementation=2,
    dropout=0.1,
    name='user_rnn')

session_rnn = GRU(
    # params['session_rnn_layers'],
    params['session_rnn_units'],
    return_state=True,
    implementation=2,
    dropout=0.2,
    name='session_rnn')

# Layer to predict new session initialization
user2session_layer = Dense(
    params['session_rnn_units'],
    input_shape=(params['user_rnn_units'],),
    activation='tanh',
    name='user2session_layer')

# Dropout layer for session initialization
user2session_dropout = Dropout(0.2)

In [11]:
# Reset Session Hidden States to 0 for new users
session_hidden_states = tf.where(
    ended_users_mask,
    tf.zeros(tf.shape(session_hidden_states)),
    session_hidden_states,
    name='reset_session_hidden_states')

# Reset User Hidden States to 0 for new users
user_hidden_states = tf.where(
    ended_users_mask,
    tf.zeros(tf.shape(user_hidden_states)),
    user_hidden_states,
    name='reset_user_hidden_states'
)

In [12]:
# Compute new user representation for all users in current batch
new_session_hidden_states_seed, new_user_hidden_states = user_rnn.apply(
    tf.expand_dims(session_hidden_states, 1),
    initial_state=user_hidden_states)

# Predict new session initialization for next session
new_session_hidden_states = user2session_layer.apply(
    new_session_hidden_states_seed)

new_session_hidden_states = user2session_dropout.apply(
    new_session_hidden_states
)

In [13]:
# Select new session initialization for new sessions
session_hidden_states = tf.where(
    ended_sessions_mask,
    new_session_hidden_states,
    session_hidden_states,
    name='initialize_new_sessions')

# Update user hidden states where the session ended
user_hidden_states = tf.where(
    ended_sessions_mask,
    new_user_hidden_states,
    user_hidden_states,
    name='update_user_representation'
)

In [14]:
# Compute new mask for ended sessions
ended_sessions_mask = tf.cast(
    tf.where(
        tf.equal(features['ProductId'], -1),
        tf.ones(tf.shape(ended_sessions_mask)),
        tf.zeros(tf.shape(ended_sessions_mask)),
        name='compute_ended_sessions'),
    tf.bool)

# Compute new mask for ending sessions
ending_sessions_mask = tf.cast(
    tf.where(
        tf.equal(labels['ProductId'], -1),
        tf.ones(tf.shape(ending_sessions_mask)),
        tf.zeros(tf.shape(ending_sessions_mask)),
        name='compute_ending_sessions'),
    tf.bool)

# Compute new mask for ended users
ended_users_mask = tf.cast(
    tf.where(
        tf.equal(features['UserId'], -1),
        tf.ones(tf.shape(ended_users_mask)),
        tf.zeros(tf.shape(ended_users_mask)),
        name='compute_ended_users'),
    tf.bool)

In [15]:
# Relevant sessions have not ended and do not end in the next step
relevant_sessions_mask = tf.logical_not(
    tf.logical_or(
        ended_sessions_mask,
        ending_sessions_mask))

# Get one-hot encoding of products
relevant_one_hots = tf.map_fn(
    lambda x: tf.cond(
        x[1],
        lambda: tf.one_hot(x[0], params['num_products']),
        lambda: tf.zeros(params['num_products'])
    ),
    [
        features['EmbeddingId'],
        relevant_sessions_mask
    ],
    dtype=tf.float32,
    name='get_relevant_one_hots')

# Get session hidden states for relevant sessions
relevant_hidden_states = tf.where(
    relevant_sessions_mask,
    session_hidden_states,
    tf.zeros(tf.shape(session_hidden_states)),
    name='get_relevant_session_hidden_states'
)

In [18]:
# Apply Session RNN -> get new hidden states and predictions
predictions, new_session_hidden_states = session_rnn.apply(
    tf.expand_dims(relevant_one_hots, 1),
    initial_state=relevant_hidden_states)

In [21]:
# Filter out irrelevant predictions
predictions = tf.boolean_mask(
    predictions,
    relevant_sessions_mask,
    name='filter_irrelevant_predictions')

# Update session hidden states for relevant sessions
session_hidden_states = tf.where(
    relevant_sessions_mask,
    new_session_hidden_states,
    session_hidden_states,
    name='update_relevant_session_hidden_states')

# Extract relevant labels
relevant_labels = tf.boolean_mask(
    labels['EmbeddingId'],
    relevant_sessions_mask,
    name='filter_irrelevant_labels')

# Compute logits for product predictions
logits = tf.matmul(
    predictions,
    softmax_weights,
    transpose_b=True) + softmax_biases

# Apply softmax activation
softmax_predictions = tf.nn.softmax(logits)

In [22]:
tf.shape(softmax_predictions)

<tf.Tensor: id=1184, shape=(2,), dtype=int32, numpy=array([     10, 2600000], dtype=int32)>

In [43]:
# Compute Hitrate

in_top_k = tf.nn.in_top_k(softmax_predictions, relevant_labels, 5)
hitrate = tf.divide(
    tf.reduce_sum(tf.cast(in_top_k, tf.int64)),
    tf.shape(labels)[0])

In [47]:
# Compute Loss Function

negative_samples_weights = tf.nn.embedding_lookup(softmax_weights, relevant_labels)
negative_samples_biases = tf.nn.embedding_lookup(softmax_biases, relevant_labels)

logits = tf.matmul(predicted_embeddings, negative_samples_weights, transpose_b=True) + negative_samples_biases
yhat = tf.nn.softmax(logits) # for each of the examples in the batch we select the remainder of the minibatch as negative examples

# TOP 1 Loss function
yhatT = tf.transpose(yhat)
term1 = tf.reduce_mean(tf.nn.sigmoid(-tf.diag_part(yhat)+yhatT)+tf.nn.sigmoid(yhatT**2), axis=0)
term2 = tf.nn.sigmoid(tf.diag_part(yhat)**2) / batch_size.value
loss = tf.reduce_mean(term1 - term2)

In [48]:
predi

<tf.Tensor: id=2867, shape=(), dtype=float32, numpy=0.9640579>

In [55]:
# Optimize
optimizer = tf.train.AdamOptimizer()
train_op = optimizer.minimize(loss, global_step=tf.train.get_or_create_global_step())

RuntimeError: `loss` passed to Optimizer.compute_gradients should be a function when eager execution is enabled.

In [16]:
session_hidden_states

<tf.Tensor: id=639, shape=(10, 25), dtype=float32, numpy=
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.],
      