Skip to content
Permalink
Browse files Browse the repository at this point in the history
Do not allow to read tensor's external_data outside the model directo…
…ry (#4400)

* Not allow to read tensor external_data outside the model directory

Signed-off-by: jnovikov <johnnovikov0@gmail.com>

* Fix formatting errors

Signed-off-by: jnovikov <johnnovikov0@gmail.com>

* Disable segfaulty test

Signed-off-by: jnovikov <johnnovikov0@gmail.com>

* Fix cpp tests

Signed-off-by: jnovikov <johnnovikov0@gmail.com>

* Fix UB while removing ../

Signed-off-by: jnovikov <johnnovikov0@gmail.com>

* Fix clang-format

Signed-off-by: jnovikov <johnnovikov0@gmail.com>

* Check for symlinks only on POSIX systems

Signed-off-by: jnovikov <johnnovikov0@gmail.com>

* Add specific to Windows external_data test

Signed-off-by: jnovikov <johnnovikov0@gmail.com>

* Change specific Windows external_data test decorator tofix mypy

Signed-off-by: jnovikov <johnnovikov0@gmail.com>

* Remove unused pathlib

Signed-off-by: jnovikov <johnnovikov0@gmail.com>

Signed-off-by: jnovikov <johnnovikov0@gmail.com>
  • Loading branch information
jnovikov committed Aug 10, 2022
1 parent eb634ad commit f369b0e
Show file tree
Hide file tree
Showing 5 changed files with 240 additions and 10 deletions.
27 changes: 26 additions & 1 deletion onnx/checker.cc
Expand Up @@ -127,7 +127,20 @@ void check_tensor(const TensorProto& tensor, const CheckerContext& ctx) {
for (const StringStringEntryProto& entry : tensor.external_data()) {
if (entry.has_key() && entry.has_value() && entry.key() == "location") {
has_location = true;
std::string data_path = path_join(ctx.get_model_dir(), entry.value());
std::string relative_path = clean_relative_path(entry.value());
// Check that normalized relative path starts with "../" or "..\" on windows.
if (relative_path.rfind(".." + k_preferred_path_separator, 0) == 0) {
fail_check(
"Data of TensorProto ( tensor name: ",
tensor.name(),
") should be file inside the ",
ctx.get_model_dir(),
", but the '",
entry.value(),
"' points outside the directory");
}

std::string data_path = path_join(ctx.get_model_dir(), relative_path);
// use stat to check whether the file exists
struct stat buffer;
if (stat((data_path).c_str(), &buffer) != 0) {
Expand All @@ -138,6 +151,18 @@ void check_tensor(const TensorProto& tensor, const CheckerContext& ctx) {
data_path,
", but it doesn't exist or is not accessible.");
}
#ifdef _WIN32
#else // POSIX
// Do not allow symlinks or directories.
if (!S_ISREG(buffer.st_mode)) {
fail_check(
"Data of TensorProto ( tensor name: ",
tensor.name(),
") should be stored in ",
data_path,
", but it is not regular file.");
}
#endif
}
}
if (!has_location) {
Expand Down
88 changes: 88 additions & 0 deletions onnx/common/path.cc
Expand Up @@ -9,11 +9,99 @@

namespace ONNX_NAMESPACE {

bool is_path_separator(char c) {
// Windows accept / as path separator.
if (k_preferred_path_separator == "\\") {
return c == '\\' || c == '/';
}

return c == k_preferred_path_separator[0];
}

void normalize_separator(std::string& path) {
char preferred_sep = k_preferred_path_separator[0];
if (preferred_sep == '/') {
// Do nothing on linux.
return;
}

for (size_t i = 0; i < path.size(); i++) {
if (is_path_separator(path[i]) && path[i] != preferred_sep) {
path[i] = preferred_sep;
}
}
}

std::string path_join(const std::string& origin, const std::string& append) {
if (origin.find_last_of(k_preferred_path_separator) != origin.length() - k_preferred_path_separator.length()) {
return origin + k_preferred_path_separator + append;
}
return origin + append;
}

std::string clean_relative_path(const std::string& path) {
if (path.empty()) {
return ".";
}

std::string out;

char sep = k_preferred_path_separator[0];
size_t n = path.size();

size_t r = 0;
size_t dotdot = 0;

while (r < n) {
if (is_path_separator(path[r])) {
r++;
continue;
}

if (path[r] == '.' && (r + 1 == n || is_path_separator(path[r + 1]))) {
r++;
continue;
}

if (path[r] == '.' && path[r + 1] == '.' && (r + 2 == n || is_path_separator(path[r + 2]))) {
r += 2;

if (out.size() > dotdot) {
while (out.size() > dotdot && !is_path_separator(out.back())) {
out.pop_back();
}
if (!out.empty())
out.pop_back();
} else {
if (!out.empty()) {
out.push_back(sep);
}

out.push_back('.');
out.push_back('.');
dotdot = out.size();
}

continue;
}

if (!out.empty() && out.back() != sep) {
out.push_back(sep);
}

for (; r < n && !is_path_separator(path[r]); r++) {
out.push_back(path[r]);
}
}

if (out.empty()) {
out.push_back('.');
}

// Use 1 separator in path.
normalize_separator(out);

return out;
}

} // namespace ONNX_NAMESPACE
2 changes: 2 additions & 0 deletions onnx/common/path.h
Expand Up @@ -18,5 +18,7 @@ const std::string k_preferred_path_separator = "/";
#endif

std::string path_join(const std::string& origin, const std::string& append);
void normalize_separator(std::string& path);
std::string clean_relative_path(const std::string& path);

} // namespace ONNX_NAMESPACE
70 changes: 70 additions & 0 deletions onnx/test/cpp/common_path_test.cc
@@ -0,0 +1,70 @@
/*
* SPDX-License-Identifier: Apache-2.0
*/

#include <list>
#include <utility>
#include "gtest/gtest.h"

#include "onnx/common/path.h"

using namespace ONNX_NAMESPACE;

namespace ONNX_NAMESPACE {
namespace Test {
namespace {
std::string fix_sep(std::string path) {
std::string out = path;
normalize_separator(out);
return out;
}
} // namespace

TEST(PathTest, CleanRelativePathTest) {
// Already normal.
EXPECT_EQ(clean_relative_path("abc"), fix_sep("abc"));
EXPECT_EQ(clean_relative_path("abc/def"), fix_sep("abc/def"));
EXPECT_EQ(clean_relative_path("a/b/c"), fix_sep("a/b/c"));
EXPECT_EQ(clean_relative_path("."), fix_sep("."));
EXPECT_EQ(clean_relative_path(".."), fix_sep(".."));
EXPECT_EQ(clean_relative_path("../.."), fix_sep("../.."));
EXPECT_EQ(clean_relative_path("../../abc"), fix_sep("../../abc"));
// Remove leading slash
EXPECT_EQ(clean_relative_path("/abc"), fix_sep("abc"));
EXPECT_EQ(clean_relative_path("/"), fix_sep("."));
// Remove trailing slash
EXPECT_EQ(clean_relative_path("abc/"), fix_sep("abc"));
EXPECT_EQ(clean_relative_path("abc/def/"), fix_sep("abc/def"));
EXPECT_EQ(clean_relative_path("a/b/c/"), fix_sep("a/b/c"));
EXPECT_EQ(clean_relative_path("./"), fix_sep("."));
EXPECT_EQ(clean_relative_path("../"), fix_sep(".."));
EXPECT_EQ(clean_relative_path("../../"), fix_sep("../.."));
EXPECT_EQ(clean_relative_path("/abc/"), fix_sep("abc"));
// Remove doubled slash
EXPECT_EQ(clean_relative_path("abc//def//ghi"), fix_sep("abc/def/ghi"));
EXPECT_EQ(clean_relative_path("//abc"), fix_sep("abc"));
EXPECT_EQ(clean_relative_path("///abc"), fix_sep("abc"));
EXPECT_EQ(clean_relative_path("//abc//"), fix_sep("abc"));
EXPECT_EQ(clean_relative_path("abc//"), fix_sep("abc"));
// Remove . elements
EXPECT_EQ(clean_relative_path("abc/./def"), fix_sep("abc/def"));
EXPECT_EQ(clean_relative_path("/./abc/def"), fix_sep("abc/def"));
EXPECT_EQ(clean_relative_path("abc/."), fix_sep("abc"));
// Remove .. elements
EXPECT_EQ(clean_relative_path("abc/def/ghi/../jkl"), fix_sep("abc/def/jkl"));
EXPECT_EQ(clean_relative_path("abc/def/../ghi/../jkl"), fix_sep("abc/jkl"));
EXPECT_EQ(clean_relative_path("abc/def/.."), fix_sep("abc"));
EXPECT_EQ(clean_relative_path("abc/def/../.."), fix_sep("."));
EXPECT_EQ(clean_relative_path("/abc/def/../.."), fix_sep("."));
EXPECT_EQ(clean_relative_path("abc/def/../../.."), fix_sep(".."));
EXPECT_EQ(clean_relative_path("/abc/def/../../.."), fix_sep(".."));
EXPECT_EQ(clean_relative_path("abc/def/../../../ghi/jkl/../../../mno"), fix_sep("../../mno"));
EXPECT_EQ(clean_relative_path("/../abc"), fix_sep("../abc"));
// Combinations
EXPECT_EQ(clean_relative_path("abc/./../def"), fix_sep("def"));
EXPECT_EQ(clean_relative_path("abc//./../def"), fix_sep("def"));
EXPECT_EQ(clean_relative_path("abc/../../././../def"), fix_sep("../../def"));
}

} // namespace Test
} // namespace ONNX_NAMESPACE
63 changes: 54 additions & 9 deletions onnx/test/test_external_data.py
Expand Up @@ -49,7 +49,6 @@ def create_external_data_tensor(self, value: List[Any], tensor_name: str) -> Ten
return tensor

def create_test_model(self) -> str:

constant_node = onnx.helper.make_node(
'Constant',
inputs=[],
Expand Down Expand Up @@ -226,7 +225,8 @@ def test_convert_model_to_external_data_from_one_file_with_location(self) -> Non
model_file_path = self.get_temp_model_filename()
external_data_file = str(uuid.uuid4())

convert_model_to_external_data(self.model, size_threshold=0, all_tensors_to_one_file=True, location=external_data_file)
convert_model_to_external_data(self.model, size_threshold=0, all_tensors_to_one_file=True,
location=external_data_file)
onnx.save_model(self.model, model_file_path)

self.assertTrue(Path.isfile(os.path.join(self.temp_dir, external_data_file)))
Expand Down Expand Up @@ -260,7 +260,8 @@ def test_convert_model_to_external_data_from_one_file_without_location_uses_mode
def test_convert_model_to_external_data_one_file_per_tensor_without_attribute(self) -> None:
model_file_path = self.get_temp_model_filename()

convert_model_to_external_data(self.model, size_threshold=0, all_tensors_to_one_file=False, convert_attribute=False)
convert_model_to_external_data(self.model, size_threshold=0, all_tensors_to_one_file=False,
convert_attribute=False)
onnx.save_model(self.model, model_file_path)

self.assertTrue(Path.isfile(model_file_path))
Expand All @@ -270,7 +271,8 @@ def test_convert_model_to_external_data_one_file_per_tensor_without_attribute(se
def test_convert_model_to_external_data_one_file_per_tensor_with_attribute(self) -> None:
model_file_path = self.get_temp_model_filename()

convert_model_to_external_data(self.model, size_threshold=0, all_tensors_to_one_file=False, convert_attribute=True)
convert_model_to_external_data(self.model, size_threshold=0, all_tensors_to_one_file=False,
convert_attribute=True)
onnx.save_model(self.model, model_file_path)

self.assertTrue(Path.isfile(model_file_path))
Expand All @@ -280,7 +282,8 @@ def test_convert_model_to_external_data_one_file_per_tensor_with_attribute(self)
def test_convert_model_to_external_data_does_not_convert_attribute_values(self) -> None:
model_file_path = self.get_temp_model_filename()

convert_model_to_external_data(self.model, size_threshold=0, convert_attribute=False, all_tensors_to_one_file=False)
convert_model_to_external_data(self.model, size_threshold=0, convert_attribute=False,
all_tensors_to_one_file=False)
onnx.save_model(self.model, model_file_path)

self.assertTrue(Path.isfile(os.path.join(self.temp_dir, "input_value")))
Expand Down Expand Up @@ -399,11 +402,11 @@ def get_temp_model_filename(self) -> str:
def create_test_model(self) -> ModelProto:
X = helper.make_tensor_value_info('X', TensorProto.FLOAT, self.large_data.shape)
input_init = helper.make_tensor(name='X', data_type=TensorProto.FLOAT,
dims=self.large_data.shape, vals=self.large_data.tobytes(), raw=True)
dims=self.large_data.shape, vals=self.large_data.tobytes(), raw=True)

shape_data = np.array(self.small_data, np.int64)
shape_init = helper.make_tensor(name='Shape', data_type=TensorProto.INT64,
dims=shape_data.shape, vals=shape_data.tobytes(), raw=True)
dims=shape_data.shape, vals=shape_data.tobytes(), raw=True)
C = helper.make_tensor_value_info('C', TensorProto.INT64, self.small_data)

reshape = onnx.helper.make_node(
Expand Down Expand Up @@ -432,13 +435,14 @@ def test_check_model(self) -> None:
checker.check_model(self.model)

def test_reshape_inference_with_external_data_fail(self) -> None:
onnx.save_model(self.model, self.model_file_path, save_as_external_data=True, all_tensors_to_one_file=False, size_threshold=0)
onnx.save_model(self.model, self.model_file_path, save_as_external_data=True, all_tensors_to_one_file=False,
size_threshold=0)
model_without_external_data = onnx.load(self.model_file_path, load_external_data=False)
# Shape inference of Reshape uses ParseData
# ParseData cannot handle external data and should throw the error as follows:
# Cannot parse data from external tensors. Please load external data into raw data for tensor: Shape
self.assertRaises(shape_inference.InferenceError, shape_inference.infer_shapes,
model_without_external_data, strict_mode=True)
model_without_external_data, strict_mode=True)

def test_to_array_with_external_data(self) -> None:
onnx.save_model(self.model,
Expand Down Expand Up @@ -492,5 +496,46 @@ def test_save_model_with_external_data_multiple_times(self) -> None:
self.assertTrue(np.allclose(to_array(small_shape_tensor, self.temp_dir), self.small_data))


class TestNotAllowToLoadExternalDataOutsideModelDirectory(TestLoadExternalDataBase):
"""Essential test to check that onnx (validate) C++ code will not allow to load external_data outside the model
directory. """

def create_external_data_tensor(self, value: List[Any], tensor_name: str) -> TensorProto:
tensor = from_array(np.array(value))
tensor.name = tensor_name

set_external_data(tensor, location="../../file.bin")

tensor.ClearField('raw_data')
tensor.data_location = onnx.TensorProto.EXTERNAL
return tensor

def test_check_model(self) -> None:
"""We only test the model validation as onnxruntime uses this to load the model. """
with self.assertRaises(onnx.checker.ValidationError):
checker.check_model(self.model_filename)


@pytest.mark.skipif(os.name != 'nt', reason='Skip Windows test')
class TestNotAllowToLoadExternalDataOutsideModelDirectoryOnWindows(TestLoadExternalDataBase):
"""Essential test to check that onnx (validate) C++ code will not allow to load external_data outside the model
directory. """

def create_external_data_tensor(self, value: List[Any], tensor_name: str) -> TensorProto:
tensor = from_array(np.array(value))
tensor.name = tensor_name

set_external_data(tensor, location="..\\..\\file.bin")

tensor.ClearField('raw_data')
tensor.data_location = onnx.TensorProto.EXTERNAL
return tensor

def test_check_model(self) -> None:
"""We only test the model validation as onnxruntime uses this to load the model. """
with self.assertRaises(onnx.checker.ValidationError):
checker.check_model(self.model_filename)


if __name__ == '__main__':
unittest.main()

0 comments on commit f369b0e

Please sign in to comment.