In [None]:
import tensorflow as tf

## Ragged Tensor

https://www.tensorflow.org/guide/ragged_tensor

- Variable-length features, such as the set of actors in a movie.
- Batches of variable-length sequential inputs, such as sentences or video clips.
- Hierarchical inputs, such as text documents that are subdivided into sections, paragraphs, sentences, and words.
- Individual fields in structured inputs, such as protocol buffers.

In [None]:
tensor_two_d = tf.constant([
    [1,2,0],
    [3,5,-1],
    [1,5,6],
    [2,3,8]
])
tensor_two_d.shape

TensorShape([4, 3])

In [None]:
tensor_two_d = [
    [1,2,0],
    [3,],
    [1,5,6],
    [2,3]
]

tensor_ragged = tf.ragged.constant(tensor_two_d)
tensor_ragged.shape

TensorShape([4, None])

In [None]:
tensor_ragged

<tf.RaggedTensor [[1, 2, 0], [3], [1, 5, 6], [2, 3]]>

tf.RaggedTensor

https://www.tensorflow.org/api_docs/python/tf/RaggedTensor

In [None]:
values = [3, 1, 4, 1, 5, 9, 2, 6]
tf.RaggedTensor.from_row_lengths(values, row_lengths=[4, 0, 3, 1, 0])

<tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>

In [None]:
tf.RaggedTensor.from_row_limits(values, row_limits=[4, 4, 7, 8, 8])

<tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>

In [None]:
print(tf.RaggedTensor.from_row_splits(
      values=[3, 1, 4, 1, 5, 9, 2, 6],
      row_splits=[0, 4, 4, 7, 8, 8]))

<tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>


In [None]:
dt = tf.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]])
tf.RaggedTensor.from_tensor(dt)

<tf.RaggedTensor [[5, 7, 0],
 [0, 3, 0],
 [6, 0, 0]]>

In [None]:
tf.RaggedTensor.from_tensor(dt, lengths=[1, 0, 3])

<tf.RaggedTensor [[5], [], [6, 0, 0]]>

## Sparse Tensors

https://www.tensorflow.org/guide/sparse_tensor

In [None]:
## efficient way to deal with tensor with many zeros
tensor_sparse = tf.sparse.SparseTensor(
    indices=[[1,1],[3,4]], values=[11,56], dense_shape=[5,6]
)
print(tensor_sparse)

SparseTensor(indices=tf.Tensor(
[[1 1]
 [3 4]], shape=(2, 2), dtype=int64), values=tf.Tensor([11 56], shape=(2,), dtype=int32), dense_shape=tf.Tensor([5 6], shape=(2,), dtype=int64))


In [None]:
tf.sparse.to_dense(tensor_sparse) # values will be located, in position (1,1) (3,4)

<tf.Tensor: shape=(5, 6), dtype=int32, numpy=
array([[ 0,  0,  0,  0,  0,  0],
       [ 0, 11,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0, 56,  0],
       [ 0,  0,  0,  0,  0,  0]], dtype=int32)>

## String Tensors

https://www.tensorflow.org/api_docs/python/tf/strings

In [None]:
tensor_string = tf.constant(["hello", "i am", "a string"])
print(tensor_string)

tf.Tensor([b'hello' b'i am' b'a string'], shape=(3,), dtype=string)


In [None]:
tf.strings.join(tensor_string, separator=" ")

<tf.Tensor: shape=(), dtype=string, numpy=b'hello i am a string'>