Skip to content

Commit

Permalink
[jit] Validate statement parsing during class deserialization (pytorc…
Browse files Browse the repository at this point in the history
…h#108417)

Hi!

I've been fuzzing different pytorch modules with with [sydr-fuzz](https://github.com/ispras/oss-sydr-fuzz/tree/master/projects/pytorch), and found a SEGV that occurs during class deserialization in jit module.

Docker to reproduce found error: [Dockerfile](https://github.com/ispras/oss-sydr-fuzz/tree/master/projects/pytorch).

### PoC:
[crash-bfbab61bf86755aa712bb978e26057ae76d75fe4.txt](https://github.com/pytorch/pytorch/files/12499228/crash-bfbab61bf86755aa712bb978e26057ae76d75fe4.txt)

### ASAN report
```
==1003115==ERROR: AddressSanitizer: SEGV on unknown address (pc 0x00000db61680 bp 0x7fffffff5e30 sp 0x7fffffff5a60 T0)
==1003115==The signal is caused by a READ memory access.
==1003115==Hint: this fault was caused by a dereference of a high value address (see register values below).  Disassemble the provided pc to learn which register was used.
    #0 0xdb61680 in c10::intrusive_ptr<torch::jit::Tree, c10::detail::intrusive_target_default_null_type<torch::jit::Tree> >::retain_() /pytorch/c10/util/intrusive_ptr.h:265:54
    #1 0xdb6721c in c10::intrusive_ptr<torch::jit::Tree, c10::detail::intrusive_target_default_null_type<torch::jit::Tree> >::intrusive_ptr(c10::intrusive_ptr<torch::jit::Tree, c10::detail::intrusive_target_default_null_type<torch::jit::Tree> > const&) /pytorch/c10/util/intrusive_ptr.h:354:5
    #2 0xdb6721c in torch::jit::Expr::Expr(c10::intrusive_ptr<torch::jit::Tree, c10::detail::intrusive_target_default_null_type<torch::jit::Tree> > const&) /pytorch/torch/csrc/jit/frontend/tree_views.h:270:49
    #3 0xdbf73b9 in torch::jit::Maybe<torch::jit::Expr>::get() const /pytorch/torch/csrc/jit/frontend/tree_views.h:212:12
    #4 0xecac171 in torch::jit::SourceImporterImpl::importClass(c10::QualifiedName const&, torch::jit::ClassDef const&, bool) /pytorch/torch/csrc/jit/serialization/import_source.cpp:454:64
    #5 0xeca0ada in torch::jit::SourceImporterImpl::importNamedType(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, torch::jit::ClassDef const&) /pytorch/torch/csrc/jit/serialization/import_source.cpp:288:5
    #6 0xeca7422 in torch::jit::SourceImporterImpl::findNamedType(c10::QualifiedName const&) /pytorch/torch/csrc/jit/serialization/import_source.cpp:140:5
    #7 0xeca295c in torch::jit::SourceImporterImpl::resolveType(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, torch::jit::SourceRange const&) /pytorch/torch/csrc/jit/serialization/import_source.cpp:261:10
    #8 0xdd03bc8 in torch::jit::ScriptTypeParser::parseTypeFromExpr(torch::jit::Expr const&) const /pytorch/torch/csrc/jit/frontend/script_type_parser.cpp:238:24
    #9 0xdcfc9b6 in torch::jit::ScriptTypeParser::parseType(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) /pytorch/torch/csrc/jit/frontend/script_type_parser.cpp:312:10
    #10 0xecbac43 in torch::jit::SourceImporter::loadType(c10::QualifiedName const&) const /pytorch/torch/csrc/jit/serialization/import_source.cpp:786:27
    #11 0xec2b5d3 in torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)::$_0::operator()(c10::QualifiedName const&) const /pytorch/torch/csrc/jit/serialization/import.cpp:146:33
    #12 0xec2b5d3 in c10::StrongTypePtr std::__invoke_impl<c10::StrongTypePtr, torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)::$_0&, c10::QualifiedName const&>(std::__invoke_other, torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)::$_0&, c10::QualifiedName const&) /usr/bin/../lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/invoke.h:60:14
    #13 0xec2b4a0 in std::enable_if<is_invocable_r_v<c10::StrongTypePtr, torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)::$_0&, c10::QualifiedName const&>, c10::StrongTypePtr>::type std::__invoke_r<c10::StrongTypePtr, torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)::$_0&, c10::QualifiedName const&>(torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)::$_0&, c10::QualifiedName const&) /usr/bin/../lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/invoke.h:113:9
    #14 0xec2b3a0 in std::_Function_handler<c10::StrongTypePtr (c10::QualifiedName const&), torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)::$_0>::_M_invoke(std::_Any_data const&, c10::QualifiedName const&) /usr/bin/../lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/std_function.h:291:9
    #15 0xec95f7c in std::function<c10::StrongTypePtr (c10::QualifiedName const&)>::operator()(c10::QualifiedName const&) const /usr/bin/../lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/std_function.h:622:14
    #16 0xed78721 in torch::jit::Unpickler::readGlobal(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) /pytorch/torch/csrc/jit/serialization/unpickler.cpp:844:9
    #17 0xed87821 in torch::jit::Unpickler::readInstruction() /pytorch/torch/csrc/jit/serialization/unpickler.cpp:520:7
    #18 0xed85b27 in torch::jit::Unpickler::run() /pytorch/torch/csrc/jit/serialization/unpickler.cpp:253:27
    #19 0xed85781 in torch::jit::Unpickler::parse_ivalue() /pytorch/torch/csrc/jit/serialization/unpickler.cpp:206:3
    #20 0xec9c7be in torch::jit::readArchiveAndTensors(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, c10::optional<std::function<c10::StrongTypePtr (c10::QualifiedName const&)> >, c10::optional<std::function<c10::intrusive_ptr<c10::ivalue::Object, c10::detail::intrusive_target_default_null_type<c10::ivalue::Object> > (c10::StrongTypePtr, c10::IValue)> >, c10::optional<c10::Device>, caffe2::serialize::PyTorchStreamReader&, c10::Type::SingletonOrSharedTypePtr<c10::Type> (*)(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&), std::shared_ptr<torch::jit::DeserializationStorageContext>) /pytorch/torch/csrc/jit/serialization/import_read.cpp:53:20
    #21 0xec2b168 in torch::jit::(anonymous namespace)::ScriptModuleDeserializer::readArchive(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) /pytorch/torch/csrc/jit/serialization/import.cpp:184:10
    #22 0xec27235 in torch::jit::(anonymous namespace)::ScriptModuleDeserializer::deserialize(c10::optional<c10::Device>, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >&, bool) /pytorch/torch/csrc/jit/serialization/import.cpp:287:19
    #23 0xec25644 in torch::jit::import_ir_module(std::shared_ptr<torch::jit::CompilationUnit>, std::istream&, c10::optional<c10::Device>, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >&, bool, bool) /pytorch/torch/csrc/jit/serialization/import.cpp:389:25
    #24 0xec2dcbe in torch::jit::import_ir_module(std::shared_ptr<torch::jit::CompilationUnit>, std::istream&, c10::optional<c10::Device>, bool) /pytorch/torch/csrc/jit/serialization/import.cpp:325:10
    #25 0xec30659 in torch::jit::load(std::istream&, c10::optional<c10::Device>, bool) /pytorch/torch/csrc/jit/serialization/import.cpp:485:10
    #26 0x8d8636 in LLVMFuzzerTestOneInput /load.cc:42:14
    #27 0x8d835d in ExecuteFilesOnyByOne /AFLplusplus/utils/aflpp_driver/aflpp_driver.c:255:7
    #28 0x8d8168 in LLVMFuzzerRunDriver /AFLplusplus/utils/aflpp_driver/aflpp_driver.c
    #29 0x8d7d28 in main /AFLplusplus/utils/aflpp_driver/aflpp_driver.c:300:10
    #30 0x7ffff7a37082 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x24082) (BuildId: 1878e6b475720c7c51969e69ab2d276fae6d1dee)
    #31 0x817add in _start (/load_afl+0x817add)

AddressSanitizer can not provide additional info.
SUMMARY: AddressSanitizer: SEGV /pytorch/c10/util/intrusive_ptr.h:265:54 in c10::intrusive_ptr<torch::jit::Tree, c10::detail::intrusive_target_default_null_type<torch::jit::Tree> >::retain_()
==1003115==ABORTING

```

Pull Request resolved: pytorch#108417
Approved by: https://github.com/ezyang
  • Loading branch information
apach301 authored and pytorchmergebot committed Sep 5, 2023
1 parent 96d7407 commit e787708
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions torch/csrc/jit/serialization/import_source.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,12 @@ void SourceImporterImpl::importClass(
switch (statement.kind()) {
case TK_ASSIGN: {
const auto assign = Assign(statement);
auto check_assign_values = [&assign](const std::string& name) {
TORCH_CHECK(
assign.rhs().present(),
"Malformed assignment statement: missing values to assign in ",
name);
};
switch (assign.lhs().kind()) {
case TK_VAR: {
const auto name = Var(assign.lhs()).name().name();
Expand All @@ -451,6 +457,7 @@ void SourceImporterImpl::importClass(
is_module,
"Assignments in class body only "
"supported on modules right now");
check_assign_values(name);
const auto param_list = ListLiteral(assign.rhs().get()).inputs();
for (const auto& param : param_list) {
parameter_names.insert(StringLiteral(param).text());
Expand All @@ -461,6 +468,7 @@ void SourceImporterImpl::importClass(
} else if (name == "__buffers__") {
TORCH_INTERNAL_ASSERT(
is_module, "Buffers only exist on modules at the moment");
check_assign_values(name);
const auto buffer_list = ListLiteral(assign.rhs().get()).inputs();
for (const auto& buffer : buffer_list) {
buffer_names.insert(StringLiteral(buffer).text());
Expand All @@ -469,6 +477,7 @@ void SourceImporterImpl::importClass(
TORCH_INTERNAL_ASSERT(
is_module,
"Forward pre hooks only exist on modules at the moment");
check_assign_values(name);
const auto pre_hook_list =
ListLiteral(assign.rhs().get()).inputs();
for (const auto& pre_hook : pre_hook_list) {
Expand All @@ -480,6 +489,7 @@ void SourceImporterImpl::importClass(
TORCH_INTERNAL_ASSERT(
is_module,
"Forward hooks only exist on modules at the moment");
check_assign_values(name);
const auto hook_list = ListLiteral(assign.rhs().get()).inputs();
for (const auto& hook : hook_list) {
std::string hook_name = StringLiteral(hook).text();
Expand Down

0 comments on commit e787708

Please sign in to comment.