In [1]:
import tensorflow as tf
import numpy as np

# Simple Demo for tf.string_split

Reference:
*  [Tensorflow tf.string_split Demo](https://github.com/tensorflow/transform/blob/master/tensorflow_transform/mappers_test.py#L117)

In [6]:
string_tensor = tf.constant(['One was Johnny', 'Two was a rat'])
tokenized_tensor = tf.string_split(string_tensor, delimiter=' ')

In [3]:
with tf.Session() as sess:
    print(tokenized_tensor.eval())

SparseTensorValue(indices=array([[0, 0],
       [0, 1],
       [0, 2],
       [1, 0],
       [1, 1],
       [1, 2],
       [1, 3]]), values=array([b'One', b'was', b'Johnny', b'Two', b'was', b'a', b'rat'],
      dtype=object), dense_shape=array([2, 4]))


# How to convert [tf.string] to [np.array]

1. as **tf.map_fn** has to keep dimension, just use tf.map_fn for string process and keep_dim
2. **keypoints:**:
   * using tf.string_split for split for all, then to SparseTensorValue
   * convert SparseTensorValue to dense matrix, via tf.sparse_tensor_to_dense
   * convert all of element of dense from string to numeric

In [4]:
## 1. example 1 : just for converting to qualified string matrix
x = tf.placeholder(tf.string)
def parse(x):
    x = tf.regex_replace(x, "\[", "")
    x = tf.regex_replace(x, "\]", "")
    return x

output_strs = tf.map_fn(parse, x)
t1 = tf.string_split(output_strs, delimiter=",")
target_indices, target_values = t1.indices, tf.strings.to_number(t1.values)

with tf.Session() as sess:
    print(sess.run(target_values, feed_dict={x: ["[1.0,2.0]", "[2.0,3.0]"]}))

[1. 2. 2. 3.]


In [5]:
## 2. example 2 : matrix conversion for follow-ups
x = tf.placeholder(tf.string)
y = tf.map_fn(lambda elem: tf.regex_replace(elem, "[\[|\]]", ""), x, dtype=tf.string)
y = tf.string_split(y, delimiter=",")
y = tf.sparse_tensor_to_dense(y, default_value="")
y = tf.strings.to_number(y)

with tf.Session() as sess:
    print(sess.run(y, feed_dict={x: ["[1.0,2.0]", "[2.0,3.0]"]}))

[[1. 2.]
 [2. 3.]]
