Merge pull request #34 from GunnarFarneback/highlevel_release

High level release function
jw3126 · Oct 19, 2023 · 5ed108b · 5ed108b · jw3126 · Oct 19, 2023
2 parents 4688a1a + 9260fe2
commit 5ed108b
Show file tree

Hide file tree

Showing 5 changed files with 39 additions and 2 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "ONNXRunTime"
 uuid = "e034b28e-924e-41b2-b98f-d2bbeb830c6a"
 authors = ["Jan Weidner <jw3126@gmail.com> and contributors"]
-version = "0.4.0"
+version = "0.4.1"
 
 [deps]
 ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197"

diff --git a/README.md b/README.md
@@ -49,6 +49,10 @@ julia> import CUDA, cuDNN
 julia> ORT.load_inference(path, execution_provider=:cuda)
 ```
 
+Memory allocated by a model is eventually automatically released after
+it goes out of scope, when the model object is deleted by the garbage
+collector. It can also be immediately released with `release(model)`.
+
 The low level API mirrors the offical [C-API](https://github.com/microsoft/onnxruntime/blob/v1.8.1/include/onnxruntime/core/session/onnxruntime_c_api.h#L347). The above example looks like this:
 ```julia
 using ONNXRunTime.CAPI

diff --git a/src/highlevel.jl b/src/highlevel.jl
@@ -12,7 +12,7 @@ end
 
 using .CAPI
 using .CAPI: juliatype, EXECUTION_PROVIDERS
-export InferenceSession, load_inference
+export InferenceSession, load_inference, release
 
 """
     $TYPEDEF
@@ -146,6 +146,7 @@ function (o::InferenceSession)(
                                inputs,
                                output_names=nothing
                               )
+    isalive(o) || error("Session has been released and can no longer be called.")
     if output_names === nothing
         output_names = @__MODULE__().output_names(o)
     end
@@ -179,3 +180,25 @@ function (o::InferenceSession)(
     output_tensors = Run(o.api, o.session, run_options, inp_names, input_tensors, output_names)
     make_output(o, inputs, output_names, output_tensors)
 end
+
+"""
+    release(o::InferenceSession)::Nothing
+
+Release memory allocated to an [`InferenceSession`](@ref). This also
+happens automatically when the object has gone out of scope and the
+garbage collector deletes it.
+
+However, there is no guarantee when that happens, so it can be useful
+to manually release the memory. This is especially true when the model
+has allocated GPU memory, which does not put pressure on the garbage
+collector to run promptly.
+
+Using the inference session after releasing is an error.
+"""
+function release(o::InferenceSession)
+    CAPI.release(o.api, o.session)
+    CAPI.release(o.api, o.meminfo)
+    CAPI.release(o.api, o.allocator)
+end
+
+isalive(o::InferenceSession) = all(CAPI.isalive, (o.session, o.meminfo, o.allocator))
diff --git a/test/test_cuda_extension.jl b/test/test_cuda_extension.jl
@@ -77,6 +77,7 @@ end
                       using ONNXRunTime
                       load_inference("$(onnx_path)", execution_provider = :cpu)
                       """
+        @test success(run(`julia +1.9 --project=$(env) -e "$(test_script)"`))
         # CUDA not loaded. Well, cuDNN pulls in CUDA so this passes anyway.
         test_script = """
                       using ONNXRunTime

diff --git a/test/test_highlevel.jl b/test/test_highlevel.jl
@@ -151,6 +151,15 @@ using ONNXRunTime: juliatype
         @test out.x_plus_1  ≈ x .+ 1
         @test out.y_plus_2  ≈ y .+ 2
     end
+    @testset "Release session" begin
+        path = ORT.testdatapath("increment2x3.onnx")
+        model = ORT.load_inference(path, execution_provider=:cpu)
+        input = randn(Float32, 2, 3)
+        y = model((;input))
+        release(model)
+        @test_throws ErrorException y = model((;input))
+        @test_throws "Session has been released and can no longer be called." y = model((;input))
+    end
 end