Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable multi-dimensional and axis support for tf.unique_with_counts #16503

Merged
merged 5 commits into from
Feb 24, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
85 changes: 85 additions & 0 deletions tensorflow/core/api_def/base_api/api_def_UniqueWithCountsV2.pbtxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
op {
graph_op_name: "UniqueWithCountsV2"
in_arg {
name: "x"
description: <<END
A `Tensor`.
END
}
in_arg {
name: "axis"
description: <<END
A `Tensor` of type `int32` (default: None). The axis of the Tensor to
find the unique elements.
END
}
out_arg {
name: "y"
description: <<END
A `Tensor`. Unique elements along the `axis` of `Tensor` x.
END
}
out_arg {
name: "idx"
description: <<END
A 1-D Tensor. Has the same type as x that contains the index of each
value of x in the output y.
END
}
out_arg {
name: "count"
description: <<END
A 1-D Tensor. The count of each value of x in the output y.
END
}
summary: "Finds unique elements along an axis of a tensor."
description: <<END
This operation either returns a tensor `y` containing unique elements
along the `axis` of a tensor. The returned unique elements is sorted
in the same order as they occur along `axis` in `x`.
This operation also returns a tensor `idx` and a tensor `count`
that are the same size as the number of the elements in `x` along the
`axis` dimension. The `idx` contains the index in the unique output `y`
and the `count` contains the count in the unique output `y`.
In other words, for an `1-D` tensor `x` with `axis = None:

`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`

For example:

```
# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
y, idx, count = unique_with_counts(x)
y ==> [1, 2, 4, 7, 8]
idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
count ==> [2, 1, 3, 1, 2]
```

For an `2-D` tensor `x` with `axis = 0`:

```
# tensor 'x' is [[1, 0, 0],
# [1, 0, 0],
# [2, 0, 0]]
y, idx, count = unique_with_counts(x, axis=0)
y ==> [[1, 0, 0],
[2, 0, 0]]
idx ==> [0, 0, 1]
count ==> [2, 1]
```

For an `2-D` tensor `x` with `axis = 1`:

```
# tensor 'x' is [[1, 0, 0],
# [1, 0, 0],
# [2, 0, 0]]
y, idx, count = unique_with_counts(x, axis=1)
y ==> [[1, 0],
[1, 0],
[2, 0]]
idx ==> [0, 1, 1]
count ==> [1, 2]
```
END
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
op {
graph_op_name: "UniqueWithCounts"
visibility: HIDDEN
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
op {
graph_op_name: "UniqueWithCountsV2"
visibility: HIDDEN
}
10 changes: 10 additions & 0 deletions tensorflow/core/kernels/unique_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,16 @@ class UniqueOp : public OpKernel {
.Device(DEVICE_CPU) \
.TypeConstraint<type>("T") \
.TypeConstraint<int64>("out_idx"), \
UniqueOp<type, int64>); \
REGISTER_KERNEL_BUILDER(Name("UniqueWithCountsV2") \
.Device(DEVICE_CPU) \
.TypeConstraint<type>("T") \
.TypeConstraint<int32>("out_idx"), \
UniqueOp<type, int32>) \
REGISTER_KERNEL_BUILDER(Name("UniqueWithCountsV2") \
.Device(DEVICE_CPU) \
.TypeConstraint<type>("T") \
.TypeConstraint<int64>("out_idx"), \
UniqueOp<type, int64>)
TF_CALL_REAL_NUMBER_TYPES(REGISTER_UNIQUE);
REGISTER_UNIQUE(string)
Expand Down
17 changes: 17 additions & 0 deletions tensorflow/core/ops/array_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,23 @@ REGISTER_OP("UniqueWithCounts")
return Status::OK();
});

REGISTER_OP("UniqueWithCountsV2")
.Input("x: T")
.Input("axis: Taxis")
.Output("y: T")
.Output("idx: out_idx")
.Output("count: out_idx")
.Attr("T: type")
.Attr("Taxis: {int32,int64} = DT_INT64")
.Attr("out_idx: {int32, int64} = DT_INT32")
.SetShapeFn([](InferenceContext* c) {
auto uniq = c->Vector(InferenceContext::kUnknownDim);
c->set_output(0, uniq);
c->set_output(1, c->input(0));
c->set_output(2, uniq);
return Status::OK();
});

namespace {

Status ShapeShapeFn(InferenceContext* c) {
Expand Down
33 changes: 33 additions & 0 deletions tensorflow/python/kernel_tests/unique_op_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,39 @@ def testString(self):
v = [1 if x[i] == value.decode('ascii') else 0 for i in range(7000)]
self.assertEqual(count, sum(v))

def testInt32Axis(self):
for dtype in [np.int32, np.int64]:
x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]])
with self.test_session() as sess:
y0, idx0, count0 = gen_array_ops._unique_with_counts_v2(
x, axis=np.array([0], dtype))
tf_y0, tf_idx0, tf_count0 = sess.run([y0, idx0, count0])
y1, idx1, count1 = gen_array_ops._unique_with_counts_v2(
x, axis=np.array([1], dtype))
tf_y1, tf_idx1, tf_count1 = sess.run([y1, idx1, count1])
self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]]))
self.assertAllEqual(tf_idx0, np.array([0, 0, 1]))
self.assertAllEqual(tf_count0, np.array([2, 1]))
self.assertAllEqual(tf_y1, np.array([[1, 0], [1, 0], [2, 0]]))
self.assertAllEqual(tf_idx1, np.array([0, 1, 1]))
self.assertAllEqual(tf_count1, np.array([1, 2]))

def testInt32V2(self):
# This test is only temporary, once V2 is used
# by default, the axis will be wrapped to allow `axis=None`.
x = np.random.randint(2, high=10, size=7000)
with self.test_session() as sess:
y, idx, count = gen_array_ops._unique_with_counts_v2(
x, axis=np.array([], np.int32))
tf_y, tf_idx, tf_count = sess.run([y, idx, count])

self.assertEqual(len(x), len(tf_idx))
self.assertEqual(len(tf_y), len(np.unique(x)))
for i in range(len(x)):
self.assertEqual(x[i], tf_y[tf_idx[i]])
for value, count in zip(tf_y, tf_count):
self.assertEqual(count, np.sum(x == value))


if __name__ == '__main__':
test.main()
12 changes: 12 additions & 0 deletions tensorflow/python/ops/array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1319,6 +1319,18 @@ def unique(x, out_idx=dtypes.int32, name=None):
unique.__doc__ = gen_array_ops._unique.__doc__


@tf_export("unique_with_counts")
def unique_with_counts(x, out_idx=dtypes.int32, name=None):
# TODO(yongtang): switch to v2 once API deprecation
# period (3 weeks) pass.
# TODO(yongtang): The documentation should also
# be updated when switch to v2.
return gen_array_ops._unique_with_counts(x, out_idx, name)


unique_with_counts.__doc__ = gen_array_ops._unique_with_counts.__doc__


@tf_export("split")
def split(value, num_or_size_splits, axis=0, num=None, name="split"):
"""Splits a tensor into sub tensors.
Expand Down
2 changes: 2 additions & 0 deletions tensorflow/python/ops/hidden_ops.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ TileGrad # Exported through array_grad instead of array_ops.
ZerosLike # TODO(josh11b): Use this instead of the Python version.
Unique
UniqueV2
UniqueWithCounts
UniqueWithCountsV2
Unpack

# candidate_sampling_ops
Expand Down