-
Notifications
You must be signed in to change notification settings - Fork 74.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Dataset c++ extend documentation is outdated for tf 2.0 & DatasetV2 #27355
Comments
Thanks @vrince. Could you be little more specific and explain which part of the doc need to be changed? Thanks! |
Hi ! Sorry for the delay ... It a little hard for me to provide meaningful diff in issues comment. Can you point me to the source of the doc so I'll patch it and send you the difference ? |
Basically what need to be changed is the webpage itself. Here is what I changed for the #include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
-namespace myproject
-{
-namespace
-{
-
using ::tensorflow::DT_STRING;
using ::tensorflow::PartialTensorShape;
using ::tensorflow::Status;
-class MyReaderDatasetOp : public tensorflow::DatasetOpKernel
+class MyReaderDatasetOp : public tensorflow::data::DatasetOpKernel
{
public:
- MyReaderDatasetOp(tensorflow::OpKernelConstruction *ctx)
+ explicit MyReaderDatasetOp(tensorflow::OpKernelConstruction *ctx)
: DatasetOpKernel(ctx)
{
// Parse and validate any attrs that define the dataset using
@@ -23,7 +18,7 @@ class MyReaderDatasetOp : public tensorflow::DatasetOpKernel
}
void MakeDataset(tensorflow::OpKernelContext *ctx,
- tensorflow::DatasetBase **output) override
+ tensorflow::data::DatasetBase **output) override
{
// Parse and validate any input tensors that define the dataset using
// `ctx->input()` or the utility function
@@ -35,13 +30,13 @@ class MyReaderDatasetOp : public tensorflow::DatasetOpKernel
}
private:
- class Dataset : public tensorflow::GraphDatasetBase
+ class Dataset : public tensorflow::DatasetBase
{
public:
- Dataset(tensorflow::OpKernelContext *ctx) : GraphDatasetBase(ctx) {}
+ Dataset(tensorflow::OpKernelContext *ctx) : tensorflow::data::DatasetBase(tensorflow::data::DatasetContext(ctx)) {}
std::unique_ptr<tensorflow::IteratorBase> MakeIteratorInternal(
- const string &prefix) const override
+ const std::string &prefix) const
{
return std::unique_ptr<tensorflow::IteratorBase>(new Iterator(
{this, tensorflow::strings::StrCat(prefix, "::MyReader")}));
@@ -57,6 +52,7 @@ class MyReaderDatasetOp : public tensorflow::DatasetOpKernel
static auto *const dtypes = new tensorflow::DataTypeVector({DT_STRING});
return *dtypes;
}
+
const std::vector<PartialTensorShape> &output_shapes() const override
{
static std::vector<PartialTensorShape> *shapes =
@@ -64,15 +60,16 @@ class MyReaderDatasetOp : public tensorflow::DatasetOpKernel
return *shapes;
}
- string DebugString() const override { return "MyReaderDatasetOp::Dataset"; }
+ std::string DebugString() const override { return "MyReaderDatasetOp::Dataset"; }
protected:
// Optional: Implementation of `GraphDef` serialization for this dataset.
//
// Implement this method if you want to be able to save and restore
// instances of this dataset (and any iterators over it).
- Status AsGraphDefInternal(DatasetGraphDefBuilder *b,
- tensorflow::Node **output) const override
+ Status AsGraphDefInternal(tensorflow::SerializationContext *ctx,
+ DatasetGraphDefBuilder *b,
+ tensorflow::Node **output) const
{
// Construct nodes to represent any of the input tensors from this
// object's member variables using `b->AddScalar()` and `b->AddVector()`.
@@ -85,8 +82,8 @@ class MyReaderDatasetOp : public tensorflow::DatasetOpKernel
class Iterator : public tensorflow::DatasetIterator<Dataset>
{
public:
- explicit Iterator(const Params ¶ms)
- : DatasetIterator<Dataset>(params), i_(0) {}
+ explicit Iterator(const Params ¶ms) : DatasetIterator<Dataset>(params),
+ i_(0) {}
// Implementation of the reading logic.
//
@@ -111,7 +108,7 @@ class MyReaderDatasetOp : public tensorflow::DatasetOpKernel
{
// Create a scalar string tensor and add it to the output.
tensorflow::Tensor record_tensor(ctx->allocator({}), DT_STRING, {});
- record_tensor.scalar<string>()() = "MyReader!";
+ record_tensor.scalar<std::string>()() = "MyReader!";
out_tensors->emplace_back(std::move(record_tensor));
++i_;
*end_of_sequence = false;
@@ -145,7 +142,7 @@ class MyReaderDatasetOp : public tensorflow::DatasetOpKernel
private:
tensorflow::mutex mu_;
- int64 i_ GUARDED_BY(mu_);
+ tensorflow::int64 i_ GUARDED_BY(mu_);
};
};
};
@@ -164,6 +161,3 @@ REGISTER_OP("MyReaderDataset")
// Register the kernel implementation for MyReaderDataset.
REGISTER_KERNEL_BUILDER(Name("MyReaderDataset").Device(tensorflow::DEVICE_CPU),
MyReaderDatasetOp);
-
-} // namespace
-} // namespace myproject Here the @@ -1,46 +1,25 @@
-import tensorflow as tf
-# Assumes the file is in the current working directory.
-my_reader_dataset_module = tf.load_op_library("./my_reader_dataset_op.so")
+"""Dataset ops."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tensorflow as tf
+from tensorflow.python.platform import resource_loader
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import structure
+from tensorflow.python.framework import dtypes
-class MyReaderDataset(tf.data.Dataset):
+my_reader_dataset_module = tf.load_op_library(
+ resource_loader.get_path_to_datafile("_dataset_ops.so"))
- def __init__(self):
- super(MyReaderDataset, self).__init__()
- # Create any input attrs or tensors as members of this class.
- def _as_variant_tensor(self):
- # Actually construct the graph node for the dataset op.
- #
- # This method will be invoked when you create an iterator on this dataset
- # or a dataset derived from it.
- return my_reader_dataset_module.my_reader_dataset()
-
- # The following properties define the structure of each element: a scalar
- # <a href="../../api_docs/python/tf#string"><code>tf.string</code></a> tensor. Change these properties to match the `output_dtypes()`
- # and `output_shapes()` methods of `MyReaderDataset::Dataset` if you modify
- # the structure of each element.
- @property
- def output_types(self):
- return tf.string
+class MyReaderDataset(dataset_ops.DatasetSource):
- @property
- def output_shapes(self):
- return tf.TensorShape([])
+ def __init__(self):
+ super(MyReaderDataset, self).__init__(
+ my_reader_dataset_module.my_reader_dataset())
@property
- def output_classes(self):
- return tf.Tensor
-
-
-if __name__ == "__main__":
- # Create a MyReaderDataset and print its elements.
- with tf.Session() as sess:
- iterator = MyReaderDataset().make_one_shot_iterator()
- next_element = iterator.get_next()
- try:
- while True:
- print(sess.run(next_element)) # Prints "MyReader!" ten times.
- except tf.errors.OutOfRangeError:
- pass
+ def _element_structure(self):
+ return structure.TensorStructure(dtypes.string, []) To follow the rest of the documentation I created two files one to test the thing one to build it with bazel :
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from tensorflow_addons.utils.python import test_utils
from tensorflow_addons.dataset import dataset_ops
class DatasetOpsTest(tf.test.TestCase):
def test_dataset(self):
dataset = dataset_ops.MyReaderDataset()
i = 0
for d in dataset:
self.assertAllEqual(d, tf.constant("MyReader!"))
i += 1
self.assertEquals(i, 10)
if __name__ == "__main__":
tf.test.main()
|
This doc doesn't exist anymore. |
System information
Describe the documentation issue
The C++ code to extend Dataset (especially since DatasetV2) is outdated.
Here is a working version of files needed in the documentation : https://github.com/vrince/tensorflow_addons/tree/master/tensorflow_addons/dataset
NOTE: the only part I am not really sure about is this one : https://github.com/vrince/tensorflow_addons/blob/master/tensorflow_addons/dataset/cc/my_dataset.cpp#L76 ... basically let it as it was but I don't see the point.
There is also and external test running from python and bazel files.
Not sure where or if I even can do a pull request to change the doc.
The text was updated successfully, but these errors were encountered: