From 2b3917dc63f7daf197602f3f924a1bb1c0c94802 Mon Sep 17 00:00:00 2001 From: Ilya Sherstyuk Date: Thu, 17 Aug 2023 22:15:28 +0000 Subject: [PATCH] [ONNX] Fix memory leak when exporting models (#107244) This commit fixes a memory leak caused by creating a new PyListObject using PyDict_Items() and not releasing that list later. This often prevented the entire model from being de-allocated even when all python references to it have gone out of scope. Here is a repro script: ```python import psutil, torch, transformers, gc, os, sys import math # Size in MB model_size = 512 kB = 1024 MB = kB * kB precision_size = 4 # bytes per float activation_size = math.floor(math.sqrt(model_size * MB / precision_size)) class Net(torch.nn.Module): def __init__(self, activation_size): super(Net, self).__init__() self.linear = torch.nn.Linear(activation_size, activation_size) def forward(self, x): return {"result": self.linear(x)} def collect_and_report(s): gc.collect() print(s) #print("psutil: ", psutil.virtual_memory().percent) print("CPU MB used by this process: ", psutil.Process(os.getpid()).memory_info().rss / 1024 ** 2) print("GPU MB allocated by pytorch: ", torch.cuda.memory_allocated(0) / 1024 ** 2) print() def run_test(device_str): device = torch.device(device_str) dummy_input = torch.zeros(activation_size, requires_grad=True).to(device) collect_and_report("Before loading model: ") model = Net(activation_size).to(device) collect_and_report("After loading model: ") torch.onnx.export(model, dummy_input, "dummy.onnx") collect_and_report("After exporting model: ") del model collect_and_report("After deleting model:") print("Running CPU test: ") run_test("cpu") print("Running GPU test: ") run_test("cuda") ``` Results with this commit: ``` Running CPU test: Before loading model: CPU MB used by this process: 346.5 GPU MB allocated by pytorch: 0.0 After loading model: CPU MB used by this process: 861.078125 GPU MB allocated by pytorch: 0.0 After exporting model: CPU MB used by this process: 880.12890625 GPU MB allocated by pytorch: 0.0 After deleting model: CPU MB used by this process: 880.12890625 GPU MB allocated by pytorch: 0.0 Running GPU test: Before loading model: CPU MB used by this process: 991.9375 GPU MB allocated by pytorch: 0.04443359375 After loading model: CPU MB used by this process: 992.19140625 GPU MB allocated by pytorch: 512.0888671875 After exporting model: CPU MB used by this process: 1026.64453125 GPU MB allocated by pytorch: 520.25830078125 After deleting model: CPU MB used by this process: 1026.64453125 GPU MB allocated by pytorch: 520.25830078125 ``` With this commit: ``` Running CPU test: Before loading model: CPU MB used by this process: 372.7734375 GPU MB allocated by pytorch: 0.0 After loading model: CPU MB used by this process: 887.18359375 GPU MB allocated by pytorch: 0.0 After exporting model: CPU MB used by this process: 918.96875 GPU MB allocated by pytorch: 0.0 After deleting model: CPU MB used by this process: 407.3671875 GPU MB allocated by pytorch: 0.0 Running GPU test: Before loading model: CPU MB used by this process: 516.6875 GPU MB allocated by pytorch: 0.04443359375 After loading model: CPU MB used by this process: 516.75390625 GPU MB allocated by pytorch: 512.0888671875 After exporting model: CPU MB used by this process: 554.25390625 GPU MB allocated by pytorch: 520.2138671875 After deleting model: CPU MB used by this process: 554.25390625 GPU MB allocated by pytorch: 8.16943359375 ``` Fixes #106976 Pull Request resolved: https://github.com/pytorch/pytorch/pull/107244 Approved by: https://github.com/BowenBao, https://github.com/kit1980 --- torch/csrc/jit/passes/onnx/shape_type_inference.cpp | 5 +++-- torch/csrc/jit/python/python_arg_flatten.cpp | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/torch/csrc/jit/passes/onnx/shape_type_inference.cpp b/torch/csrc/jit/passes/onnx/shape_type_inference.cpp index 8acdb3ea032e..d8fe34712e6f 100644 --- a/torch/csrc/jit/passes/onnx/shape_type_inference.cpp +++ b/torch/csrc/jit/passes/onnx/shape_type_inference.cpp @@ -2344,8 +2344,8 @@ size_t ONNXAssignOutputShape( // Support for dict data type is limited to fixed size dictionaries in // ONNX. // Dictionary values are unrolled and keys are not preserved. - auto unrolled_dict = - py::reinterpret_borrow(PyDict_Items(output_obj)); + auto* items = PyDict_Items(output_obj); + auto unrolled_dict = py::reinterpret_borrow(items); TORCH_INTERNAL_ASSERT(PyList_Check(unrolled_dict.ptr())); for (const auto i : c10::irange(unrolled_dict.size())) { outputs_index = ONNXAssignOutputShape( @@ -2356,6 +2356,7 @@ size_t ONNXAssignOutputShape( is_script, opset_version); } + Py_DECREF(items); } else if (THPUtils_checkString(output_obj)) { // Ignore string, since they are not supported as output in ONNX. } else if (PyNone_Check(output_obj)) { diff --git a/torch/csrc/jit/python/python_arg_flatten.cpp b/torch/csrc/jit/python/python_arg_flatten.cpp index 248e64c4d792..0856eb392fb3 100644 --- a/torch/csrc/jit/python/python_arg_flatten.cpp +++ b/torch/csrc/jit/python/python_arg_flatten.cpp @@ -55,12 +55,13 @@ void flatten_rec(PyObject* obj, ParsedArgs& args) { flatten_rec(item.ptr(), args); structure.push_back(D::ListClose); } else if (PyDict_Check(obj)) { - auto dict_items = PyDict_Items(obj); + auto* dict_items = PyDict_Items(obj); structure.push_back(D::DictOpen); for (auto item : py::reinterpret_borrow(dict_items)) { flatten_rec(item.ptr(), args); } structure.push_back(D::DictClose); + Py_DECREF(dict_items); } else if (THPUtils_checkString(obj)) { string str = THPUtils_unpackString(obj); args.desc.strings.emplace_back(str);