Skip to content
Permalink
Browse files Browse the repository at this point in the history
Fix segfault/heap buffer overflow in `{Experimental,}DatasetToTFRecor…
…d` where dataset is numeric.

Code assumes only strings inputs and then interprets numbers as valid `tstring`s. Then, when trying to compute the CRC of the record this results in heap buffer overflow.

PiperOrigin-RevId: 387675909
Change-Id: I7396b9b8afc1ac744112af7c0b1cd7bb41e0f556
  • Loading branch information
mihaimaruseac authored and tensorflower-gardener committed Jul 29, 2021
1 parent b5b9ae9 commit e0b6e58
Showing 1 changed file with 14 additions and 1 deletion.
15 changes: 14 additions & 1 deletion tensorflow/core/kernels/data/experimental/to_tf_record_op.cc
Expand Up @@ -18,6 +18,7 @@ limitations under the License.
#include "tensorflow/core/framework/function_handle_cache.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/resource_mgr.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/kernels/ops_util.h"
#include "tensorflow/core/lib/core/threadpool.h"
#include "tensorflow/core/lib/io/record_writer.h"
Expand Down Expand Up @@ -91,8 +92,20 @@ class ToTFRecordOp : public AsyncOpKernel {
TF_RETURN_IF_ERROR(finalized_dataset->MakeIterator(
&iter_ctx, /*parent=*/nullptr, "ToTFRecordOpIterator", &iterator));

const int num_output_dtypes = finalized_dataset->output_dtypes().size();
if (num_output_dtypes != 1) {
return errors::InvalidArgument(
"ToTFRecordOp currently only support datasets of 1 single column, ",
"but got ", num_output_dtypes);
}
const DataType dt = finalized_dataset->output_dtypes()[0];
if (dt != DT_STRING) {
return errors::InvalidArgument(
"ToTFRecordOp currently only supports DT_STRING dataypes, but got ",
DataTypeString(dt));
}
std::vector<Tensor> components;
components.reserve(finalized_dataset->output_dtypes().size());
components.reserve(num_output_dtypes);
bool end_of_sequence;
do {
TF_RETURN_IF_ERROR(
Expand Down

0 comments on commit e0b6e58

Please sign in to comment.