# Variable-length lists

In [1]:
# Python
jagged_array = [[1, 3], [2, 4, 5], [8], []]
jagged_array

[[1, 3], [2, 4, 5], [8], []]

In [2]:
# NumPy
import numpy as np
jagged_array_np = np.array(
    [[1, 3], [2, 4, 5], [8], []], dtype=object
)
jagged_array_np

array([list([1, 3]), list([2, 4, 5]), list([8]), list([])], dtype=object)

In [3]:
# Tensorflow
import tensorflow as tf
jagged_array_tf = tf.ragged.constant(
    [[1, 3], [2, 4, 5], [8], []]
)
jagged_array_tf

<tf.RaggedTensor [[1, 3], [2, 4, 5], [8], []]>

In [4]:
# PyTorch
import torch
jagged_array_torch = torch.nested.nested_tensor(
    [[1, 3], [2, 4, 5], [8], []]
)
jagged_array_torch

  jagged_array_torch = torch.nested.nested_tensor(


nested_tensor([
  tensor([1, 3]),
  tensor([2, 4, 5]),
  tensor([8]),
  tensor([], dtype=torch.int64)
])

# Left Align a Sparse Tensor to Represent Ragged Tensor

In [5]:
import tensorflow
# RaggedTensor
X = tf.ragged.constant([
    [1, 3, 2], [4, 6], [1], [], [9, 3, 5, 8], []
])
X

<tf.RaggedTensor [[1, 3, 2], [4, 6], [1], [], [9, 3, 5, 8], []]>

In [7]:
# Convert to SparseTensor
y = X.to_sparse()
y

SparseTensor(indices=tf.Tensor(
[[0 0]
 [0 1]
 [0 2]
 [1 0]
 [1 1]
 [2 0]
 [4 0]
 [4 1]
 [4 2]
 [4 3]], shape=(10, 2), dtype=int64), values=tf.Tensor([1 3 2 4 6 1 9 3 5 8], shape=(10,), dtype=int32), dense_shape=tf.Tensor([6 4], shape=(2,), dtype=int64))

In [8]:
# Display sparse tensor as dense
tf.sparse.to_dense(y)

<tf.Tensor: shape=(6, 4), dtype=int32, numpy=
array([[1, 3, 2, 0],
       [4, 6, 0, 0],
       [1, 0, 0, 0],
       [0, 0, 0, 0],
       [9, 3, 5, 8],
       [0, 0, 0, 0]], dtype=int32)>

# Filtering values in Jagged Tensor

In [9]:
import tensorflow
X = [[1, 5], [2, 4, 11], [3, 15, 8], [14], [12, 2, 6, 18, 9], [7, 8, 1], [19]]
# Use ragged to sparse to convert to sparse tensor in this example
X = tf.ragged.constant(X).to_sparse()
# Keep values less than or equal to 10
X_filtered = tf.sparse.retain(
    X, X.values <= 10
)
X_filtered

SparseTensor(indices=tf.Tensor(
[[0 0]
 [0 1]
 [1 0]
 [1 1]
 [2 0]
 [2 2]
 [4 1]
 [4 2]
 [4 4]
 [5 0]
 [5 1]
 [5 2]], shape=(12, 2), dtype=int64), values=tf.Tensor([1 5 2 4 3 8 2 6 9 7 8 1], shape=(12,), dtype=int32), dense_shape=tf.Tensor([7 5], shape=(2,), dtype=int64))

In [10]:
# Dense
tf.sparse.to_dense(X_filtered)

<tf.Tensor: shape=(7, 5), dtype=int32, numpy=
array([[1, 5, 0, 0, 0],
       [2, 4, 0, 0, 0],
       [3, 0, 8, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 2, 6, 0, 9],
       [7, 8, 1, 0, 0],
       [0, 0, 0, 0, 0]], dtype=int32)>

In [None]:
# May not run on Mac M1 machines
import tensorflow_transform as tft
X_left_aligned = tft.sparse_tensor_left_align(X_filtered)

In [11]:
import tensorflow as tf

def sparse_left_align(X: tf.SparseTensor):
    # Compute row lengths
    row_lengths = X.with_values(tf.ones_like(X.values))
    row_lengths = tf.sparse.reduce_sum(row_lengths, axis=1)
    row_lengths = tf.cast(row_lengths, tf.int64)
    # Create the flattened array enumeration index
    array_indices = tf.ragged.range(row_lengths, dtype=tf.int64)
    array_indices = array_indices.flat_values
    # Recreate the SparseTensor
    indices = tf.stack([X.indices[:, 0], array_indices])
    indices = tf.transpose(indices)
    y = tf.SparseTensor(
        indices=indices,
        values=X.values,
        dense_shape=X.dense_shape,
    )

    return y

In [12]:
X_left_aligned = sparse_left_align(X_filtered)
X_left_aligned

SparseTensor(indices=tf.Tensor(
[[0 0]
 [0 1]
 [1 0]
 [1 1]
 [2 0]
 [2 1]
 [4 0]
 [4 1]
 [4 2]
 [5 0]
 [5 1]
 [5 2]], shape=(12, 2), dtype=int64), values=tf.Tensor([1 5 2 4 3 8 2 6 9 7 8 1], shape=(12,), dtype=int32), dense_shape=tf.Tensor([7 5], shape=(2,), dtype=int64))

In [13]:
tf.sparse.to_dense(X_left_aligned)

<tf.Tensor: shape=(7, 5), dtype=int32, numpy=
array([[1, 5, 0, 0, 0],
       [2, 4, 0, 0, 0],
       [3, 8, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [2, 6, 9, 0, 0],
       [7, 8, 1, 0, 0],
       [0, 0, 0, 0, 0]], dtype=int32)>

# Index to Binary Indicator

In [14]:
import tensorflow as tf

X = tf.ragged.constant([
    [1, 3, 2],
    [2],
    [4, 1, 0]
]).to_sparse()

tf.sparse.to_indicator(X, vocab_size=5)

<tf.Tensor: shape=(3, 5), dtype=bool, numpy=
array([[False,  True,  True,  True, False],
       [False, False,  True, False, False],
       [ True,  True, False, False,  True]])>

In [22]:
import tensorflow as tf

def sparse_to_indicator_tf(
    X: tf.SparseTensor,
    vocab_size: int,
    out_dtype: tf.dtypes.DType = tf.float32
) -> tf.SparseTensor:
    """
    Convert sparse tensor of indices to vectors of binary indicators

    * X: tf.SparseTensor
        Each row of the sparse matrix is the indices of the
        indicator vector
    * vocab_size: int
        This determines the size of the sparse indicator vector
    * out_dtype: tf.dtypes.DType
        DType of the values of the output tensor
    """
    # create indices of the output sparse tensor
    indices = tf.stack([X.indices[:, 0], tf.cast(X.values, tf.int64)], axis=0)
    indices = tf.transpose(indices)
    # indicator as ones
    values = tf.ones_like(X.values)
    values = tf.cast(values, out_dtype)
    # (batch_size, vocab_size)
    dense_shape = (tf.shape(X)[0], vocab_size)

    # Make the sparse tensor
    y = tf.SparseTensor(
        indices=indices,
        values=values,
        dense_shape=dense_shape
    )
    y = tf.sparse.reorder(y)

    return y

In [23]:
y = sparse_to_indicator_tf(X, 5, out_dtype=tf.int64)
y

SparseTensor(indices=tf.Tensor(
[[0 1]
 [0 2]
 [0 3]
 [1 2]
 [2 0]
 [2 1]
 [2 4]], shape=(7, 2), dtype=int64), values=tf.Tensor([1 1 1 1 1 1 1], shape=(7,), dtype=int64), dense_shape=tf.Tensor([3 5], shape=(2,), dtype=int64))

In [24]:
tf.sparse.to_dense(y)

<tf.Tensor: shape=(3, 5), dtype=int64, numpy=
array([[0, 1, 1, 1, 0],
       [0, 0, 1, 0, 0],
       [1, 1, 0, 0, 1]])>

In [27]:
import numpy as np
from scipy.sparse import coo_matrix

def sparse_to_indicator_scipy(X, vocab_size, out_dtype=np.float32):
    """
    Convert sparse tensor of indices to vectors of binary indicators

    * X:
        Each row of the scipy sparse matrix is the indices of the indicator vector
    * vocab_size:
        This determines the size of the sparse indicator vector
    * out_dtype:
        DType of the values of the output tensor
    """
    X = coo_matrix(X) # cast to coo_matrix
    # configure the sparse matrix
    row = np.array(X.row)
    col = np.array(X.data, dtype=row.dtype)
    data = np.ones_like(col)
    shape = (X.shape[0], vocab_size)
    # make the sparse matrix
    y = coo_matrix((data, (row, col)), shape=shape, dtype=out_dtype)

    return y

In [30]:
X_coo = coo_matrix(
    (X.values.numpy(), (X.indices[:, 0].numpy(), X.indices[:, 1].numpy())), 
    shape=X.dense_shape.numpy()
)
y = sparse_to_indicator_scipy(X_coo, 5, out_dtype=int)
y.A

array([[0, 1, 1, 1, 0],
       [0, 0, 1, 0, 0],
       [1, 1, 0, 0, 1]])

# Case Study: Jaccard Similarities Using Sparse Matrix

In [31]:
import numpy as np

def jaccard_similarities(mat):
    """
    Compute pairwise Jaccard similarities
    between rows of a coo sparse matrix
    """
    nnz = mat.getnnz(axis=1)
    # pair-wise binary intersection |X n Y|
    # sparse matrix multiplication
    sim = mat * mat.T
    sim = sim.astype(float)

    # for rows
    xx = np.repeat(nnz, sim.getnnz(axis=1))
    # for columns
    yy = nnz[sim.indices]
    
    # |X U Y| = |X| + |Y| - |X n Y|
    sim.data /= xx + yy - sim.data

    return sim

In [32]:
import numpy as np
from scipy.sparse import coo_matrix

mat_dense = np.array([
    [1, 0, 0, 1, 0],
    [0, 1, 0, 1, 0],
    [1, 0, 1, 1, 0],
    [1, 1, 1, 1, 1],
    [0, 1, 1, 1, 0],
    [1, 0, 1, 0, 1],
])
mat = coo_matrix(mat_dense)
# Use our implementation
sim = jaccard_similarities(mat)

# Using for loop and Python native set operations
# Convert binary to indices
mat_index = []
for row in mat_dense:
    indices = np.where(row>0)[0].tolist()
    mat_index.append(indices)
n_items = len(mat_index)    

# use set operations to compute jaccard similarity
sim_loop = np.zeros((n_items, n_items))
for n in range(n_items):
    for m in range(n_items):
        if m >= n: # only compute the pair once
            continue
    
        x, y = set(mat_index[n]), set(mat_index[m])
        sim_loop[n, m] = len(x.intersection(y)) / len(x.union(y))

assert np.allclose(np.tril(sim.A, -1), sim_loop)

# Case Study: Batch-wise Set Operations

In [34]:
import numpy as np
predictions = np.array([
    [False,   True,   True,  False,  False],
    [ True,  False,  False,  False,  False],
    [False,  False,  False,   True,   True],
    [False,   True,   True,  False,  False],
])

labels = np.array([
    [False,   True,   True,  False,  False],
    [False,  False,   True,  False,  False],
    [ True,  False,  False,   True,  False],
    [False,   True,  False,   True,  False],
])

# Compute true positives: prediction and label both need to be True
tp = np.logical_and(predictions, labels)
tp = np.sum(tp, axis=1) # count
# Compute false positives: prediction is True, label is False
fp = np.logical_and(predictions, ~labels)
fp = np.sum(fp, axis=1) # count

# Compute precision
precision = tp / (tp + fp)
precision

array([1. , 0. , 0.5, 0.5])

In [35]:
predictions = [[1, 2], [0], [3, 4], [1, 2]]
labels = [[1, 2], [2], [0, 3], [1, 3]]

In [36]:
# For loop solution
precision = []
for pred, lab in zip(predictions, labels):
    # true positives
    tp = set(pred).intersection(set(lab))
    # false positives
    fp = set(pred).difference(set(lab))
    # precision
    p = len(tp) / (len(tp) + len(fp))
    precision.append(p)
    
precision

[1.0, 0.0, 0.5, 0.5]

In [37]:
# Tensorflow
import tensorflow as tf
# Create the left-aligned ragged sparse tensors
predictions = tf.ragged.constant([[1, 2], [0], [3, 4], [1, 2]]).to_sparse()
labels = tf.ragged.constant([[1, 2], [2], [0, 3], [1, 3]]).to_sparse()
# Compute true positives
tp = tf.sets.intersection(predictions, labels)
tp = tf.sets.size(tp) # count each row
# Compute false positives
fp = tf.sets.difference(predictions, labels)
fp = tf.sets.size(fp) # count each row
# Compute precisions
precision = tp / (tp + fp)

precision

<tf.Tensor: shape=(4,), dtype=float64, numpy=array([1. , 0. , 0.5, 0.5])>

In [38]:
import numpy as np
from scipy.sparse import csr_matrix


def set_operation(x, y, pad=None, operation="intersection", returns="count"):
    """
    Batch-wise set operations.
    
    Inputs:
        * x, y: dense 2D np.ndarrays with/without padding values.
        * pad: padding values. Default to None which is no padding.
        * operation: set operation to perform. Valid values are
            ["intersection" (default), "union", "difference"].
        * returns: type of returned outputs, either "count" (default) which 
            counts the cardinality of the resulting set operation for each 
            row or "matrix" which returns the sparse matrix output from
            the set operations, all values are left aligned.
    """
    # Input shapes
    n_d, m_x = x.shape
    n_d, m_y = y.shape

    # Use np.unique to create convert from data -> indices
    # This can appropriately handle all data types, including strings
    unique, indices = np.unique(np.hstack((x, y)), return_inverse=True)
    n_unique = len(unique)
    
    # From flattened index -> original shape
    indices = indices.reshape(n_d, -1)
    indices_x = indices[:, :m_x]
    indices_y = indices[:, m_x:]
    
    # which index from unique is the padding of the ragged representation
    pad_index = np.where(unique == pad)[0]
    if len(pad_index) > 0: # found the padding
        pad_index = pad_index[0]
    else:
        pad_index = -1
    
    # Use csr format to create to create binary indicator matrices
    # e.g. index = [1, 3], n_unique = 5 -> [0, 1, 0, 1, 0]
    def _create_csr_indicator(idx, m):
        # create csr
        indptr = np.repeat([m], n_d).cumsum()
        indptr = np.concatenate([[0], indptr])
        indices = idx.ravel() # flatten
        data = np.ones_like(indices, dtype=int)
        data[indices==pad_index] = 0 # filter out pad index
        sparse_matrix = csr_matrix(
            (data, indices, indptr), shape=(n_d, n_unique), dtype=int
        )
        # eliminate padding if any
        sparse_matrix.eliminate_zeros()
        return sparse_matrix
    
    x_hat = _create_csr_indicator(indices_x, m_x)
    y_hat = _create_csr_indicator(indices_y, m_y)
    
    # set operations using binary arithmetic operation
    if operation == "intersection":
        res = x_hat.multiply(y_hat)
    elif operation == "union":
        res = x_hat + y_hat
        res.data = np.minimum(res.data, 1)
    elif operation == "difference":
        res = x_hat - y_hat
        res.data = np.maximum(res.data, 0)
    else:
        raise(ValueError(f"Unrecognized operation {operation}"))
        
    if returns == "count": # return cardinality of set
        return res.sum(axis=1).A.ravel()
    else: # return the actual sparse matrix
        # keep only entries of 1s
        res.eliminate_zeros()
        # replace the indicator back to actual intersected values
        res.data = np.take(unique, res.indices)
        # left align
        l = np.diff(res.indptr) # array lengths
        flat_indices = np.arange(len(res.data))
        offsets = np.repeat(res.indptr[:-1], l)
        res.indices = flat_indices - offsets
        
        return res

In [39]:
predictions = np.array([[1, 2], [0, -1], [3, 4], [1, 2]])
labels = np.array([[1, 2], [2, -1], [0, 3], [1, 3]])
tp = set_operation(predictions, labels, -1, operation="intersection")
fp = set_operation(predictions, labels, -1, operation="difference")
precision = tp / (tp + fp)
precision

array([1. , 0. , 0.5, 0.5])

Measure performance

In [41]:
import numpy as np
import tensorflow as tf

# Use IPython magic function to measure performance
x = np.array([
    [1, 2, 3, 4, 5], [2, 3, 4, 5, 6], [3, 5, 1, 0, 0]
]*1024)
y = np.array([
    [5, 6, 7, 8, 9], [2, 3, 5, 7, 8], [3, 1, 0, 0, 0]
]*1024)

In [42]:
# Our NumPy implementation
%timeit res_np = set_operation(x, y, operation="intersection", pad=0)

1.01 ms ± 21.7 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [43]:
# Python For Loop
def batch_wise_intersection(x, y, pad=None):
    batch_size = x.shape[0]
    res = [[]] * batch_size
    for ii in range(batch_size):
        # take intersection
        r = set(x[ii, :]).intersection(set(y[ii, :]))
        if pad is not None:
            # remove padding
            r = r.difference(set([pad]))
        res[ii] = len(r)
        
    return res

res_python = batch_wise_intersection(x, y, pad=0)
res_python

[1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,


In [44]:
%timeit res_python = batch_wise_intersection(x, y, pad=0)

4.27 ms ± 30.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [45]:
# Use Tensorflow tf.sets.intersection
%timeit res_tf = tf.sets.size(tf.sets.intersection(x, y))

1.18 ms ± 20.7 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


# Case Study: Autoencoders with Sparse Inputs

In [46]:
# Sparse dense multiplication (Tensorflow)
import tensorflow as tf
x = tf.sparse.from_dense([[0, 1, 1, 0, 0]])
W = tf.constant([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
    [1, 3, 5],
    [2, 4, 6],
])
y_tilde = tf.sparse.sparse_dense_matmul(x, W)
y_tilde

<tf.Tensor: shape=(1, 3), dtype=int32, numpy=array([[11, 13, 15]], dtype=int32)>

In [47]:
# Sparse dense multiplication (PyTorch)
import torch
x = torch.tensor([[0, 1, 1, 0, 0]]).to_sparse()
W = torch.tensor([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
    [1, 3, 5],
    [2, 4, 6],
])
y_tilde = torch.sparse.mm(x, W)
y_tilde

tensor([[11, 13, 15]])

In [48]:
# Using embedding lookup (Tensorflow)
import tensorflow as tf

s = tf.ragged.constant([[1, 2], [3, 0, 4]]).to_sparse()
W = tf.constant([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
    [1, 3, 5],
    [2, 4, 6],
])
weights = s.with_values(tf.ones_like(s.values)) # equal weights
y_tilde = tf.nn.embedding_lookup_sparse(
    W,
    s,
    weights,
    combiner="sum",
)
y_tilde

<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[11, 13, 15],
       [ 4,  9, 14]], dtype=int32)>

In [49]:
# Using embedding lookup (PyTorch)
import torch

# 1-based indexing; 0 is used as padding index
s = torch.tensor([[2, 3, 0, 0, 0], [4, 1, 5, 0, 0]])
W = torch.tensor([
    [0, 0, 0], # padding entry
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
    [1, 3, 5],
    [2, 4, 6],
])

y_tilde = torch.nn.functional.embedding(
    s,
    W,
    padding_idx=0,
).sum(dim=1)
y_tilde

tensor([[11, 13, 15],
        [ 4,  9, 14]])

In [50]:
# Weighted embedding lookup (Tensorflow)
import tensorflow as tf

s = tf.ragged.constant([[1, 2], [3, 0, 4]]).to_sparse()
v = tf.ragged.constant([[1.3, 2.7], [2.1, 4.6, 0.8]]).to_sparse()
W = tf.constant([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
    [1, 3, 5],
    [2, 4, 6],
], dtype=tf.float32)
y_tilde = tf.nn.embedding_lookup_sparse(
    W,
    s,
    v,
    combiner="sum",
)
y_tilde

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[24.099998, 28.1     , 32.100002],
       [ 8.3     , 18.7     , 29.099998]], dtype=float32)>

In [51]:
# Weighted embedding lookup (PyTorch)
import torch

# 1-based indexing; 0 is used as padding index
s = torch.tensor([[2, 3, 0, 0, 0], [4, 1, 5, 0, 0]])
# using dense representations with paddings
v = torch.tensor([[1.3, 2.7, 0.0, 0.0, 0.0], [2.1, 4.6, 0.8, 0.0, 0.0]])
W = torch.tensor([
    [0, 0, 0], # padding entry
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
    [1, 3, 5],
    [2, 4, 6],
], dtype=torch.float32)

# look up embeddings
y_tilde = torch.nn.functional.embedding(
    s,
    W,
    padding_idx=0,
)
# apply weights
y_tilde *= v[:, :, None]
# Combine
y_tilde = y_tilde.sum(dim=1)
y_tilde

tensor([[24.1000, 28.1000, 32.1000],
        [ 8.3000, 18.7000, 29.1000]])

### Autoencoder with sparse inputs

Tensorflow / Keras version

In [52]:
import tensorflow as tf
from keras import Model, Input
from keras.layers import Dense, Activation


class Autoencoder(Model):
    """Autoencoder with sparse inputs."""

    def __init__(
        self,
        num_items,
        emb_size,
        hidden_activation="relu",
        output_activation="sigmoid",
    ):
        super(Autoencoder, self).__init__()
        self.num_items = num_items
        self.emb_size = emb_size
        self.hidden_activation = Activation(
            hidden_activation, name="hidden_activation"
        )
        self.output_layer = Dense(num_items, name="output_layer")
        self.output_activation = Activation(
            output_activation, name="output_activation"
        )

    def build(self, input_shape):
        self.embedding = self.add_weight(
            name="embedding",
            shape=(self.num_items, self.emb_size),
            trainable=True,
        )
        self.bias = self.add_weight(
            name="bias",
            shape=(1, self.emb_size),
            trainable=True,
        )

    def call(self, inputs, training=False):
        items = inputs["items"]
        weights = inputs.get("weights", None)
        if weights is None:
            # equal weights
            weights = items.with_value(tf.ones_like(items, tf.float32))

        # Implement the sparse dense layer (batch_size, emb_size)
        embed = tf.nn.embedding_lookup_sparse(
            self.embedding, items, weights, combiner="sum", name="hidden_embed"
        )

        # Add bias
        y = embed + self.bias

        # Hidden layer activation
        y = self.hidden_activation(y)

        # Decode to produce output
        z = self.output_layer(y)
        z = self.output_activation(z)

        return z

    @property
    def model(self):
        inputs = {
            "items": Input((None,), sparse=True, dtype=tf.int64, name="items"),
            "weights": Input(
                (None,), sparse=True, dtype=tf.float32, name="weights"
            ),
        }
        model = Model(inputs, self.call(inputs))

        return model

In [53]:
inputs = {
    "items": tf.ragged.constant([[1, 2], [3, 0, 4]]).to_sparse(),
    "weights": tf.ragged.constant([[1.3, 2.7], [2.1, 4.6, 0.8]]).to_sparse(),
}

model = Autoencoder(num_items=1000, emb_size=128)
z = model(inputs) # use the input to produce output, (2, 1000)
z

<tf.Tensor: shape=(2, 1000), dtype=float32, numpy=
array([[0.49876222, 0.49345723, 0.5265454 , ..., 0.4920722 , 0.49508202,
        0.49603644],
       [0.4880186 , 0.50813854, 0.50340515, ..., 0.5355486 , 0.49567267,
        0.48957598]], dtype=float32)>

In [54]:
print(model.model.summary())

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 items (InputLayer)          [(None, None)]               0         []                            
                                                                                                  
 weights (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 tf.compat.v1.nn.embedding_  (None, 128)                  0         ['items[0][0]',               
 lookup_sparse (TFOpLambda)                                          'weights[0][0]']             
                                                                                                  
 tf.__operators__.add (TFOp  (None, 128)                  0         ['tf.compat.v1.nn.embeddin

PyTorch version

In [55]:
import torch

class Autoencoder(torch.nn.Module):
    def __init__(self, num_items, emb_size):
        super().__init__()
        self.embed = torch.nn.Embedding(
            num_items, emb_size, padding_idx=0
        )
        self.bias = torch.nn.Parameter(torch.zeros([1, emb_size])) 
        self.decoder = torch.nn.Sequential(
            torch.nn.ReLU(),
            torch.nn.Linear(emb_size, num_items),
            torch.nn.Sigmoid()
        )
        
    def forward(self, items, weights=None):
        if weights is None:
            # equal weights
            weights = torch.ones([items.shape[0], 1], dtype=torch.float32)
        
        # Implement the sparse dense layer (batch_size, emb_size)
        embed = self.embed(items)
        # Apply weights
        embed *= weights[:, :, None]
        # Sum together embeddings
        embed = embed.sum(dim=1)

        # Add bias
        y = embed + self.bias

        # Hidden layer activation
        z = self.decoder(y)
        
        return z

In [57]:
inputs = {
    "items": torch.tensor([[2, 3, 0], [4, 1, 5]]),
    "weights": torch.tensor([[1.3, 2.7, 0.0], [2.1, 4.6, 0.8]]),
}

model = Autoencoder(num_items=1000, emb_size=128)
z = model(**inputs)
z

tensor([[0.5380, 0.2701, 0.2039,  ..., 0.6616, 0.0411, 0.7240],
        [0.7360, 0.5025, 0.4160,  ..., 0.5767, 0.1092, 0.4250]],
       grad_fn=<SigmoidBackward0>)

In [58]:
print(model)

Autoencoder(
  (embed): Embedding(1000, 128, padding_idx=0)
  (decoder): Sequential(
    (0): ReLU()
    (1): Linear(in_features=128, out_features=1000, bias=True)
    (2): Sigmoid()
  )
)
