Adding a way to clear GPU memory (#722)

jsaied99 · web-flow · commit 07ee41c93977 · 2023-06-09T21:16:42.000-07:00
diff --git a/pgml-extension/sql/pgml--2.5.1--2.5.2.sql b/pgml-extension/sql/pgml--2.5.1--2.5.2.sql
@@ -0,0 +1,8 @@
+-- src/api.rs:599
+-- pgml::api::clear_gpu_cache
+CREATE  FUNCTION pgml."clear_gpu_cache"(
+    "memory_usage" REAL DEFAULT NULL /* Option<f32> */
+) RETURNS bool /* bool */
+IMMUTABLE STRICT PARALLEL SAFE
+LANGUAGE c /* Rust */
+AS 'MODULE_PATHNAME', 'clear_gpu_cache_wrapper';
diff --git a/pgml-extension/src/api.rs b/pgml-extension/src/api.rs
@@ -580,6 +580,29 @@ pub fn embed_batch(
     crate::bindings::transformers::embed(transformer, inputs, &kwargs.0)
 }
 
+
+/// Clears the GPU cache.
+///
+/// # Arguments
+///
+/// * `memory_usage` - Optional parameter indicating the memory usage percentage (0.0 -> 1.0)
+///
+/// # Returns
+///
+/// Returns `true` if the GPU cache was successfully cleared, `false` otherwise.
+/// # Example
+///
+/// ```sql
+/// SELECT pgml.clear_gpu_cache(memory_usage => 0.5);
+/// ```
+#[pg_extern(immutable, parallel_safe, name = "clear_gpu_cache")]
+pub fn clear_gpu_cache(
+    memory_usage: default!(Option<f32>, "NULL")
+) -> bool {
+    let memory_usage: Option<f32> = memory_usage.map(|memory_usage| memory_usage.try_into().unwrap());
+    crate::bindings::transformers::clear_gpu_cache(memory_usage)
+}
+
 #[pg_extern(immutable, parallel_safe)]
 pub fn chunk(
     splitter: &str,
diff --git a/pgml-extension/src/bindings/transformers.py b/pgml-extension/src/bindings/transformers.py
@@ -131,6 +131,17 @@ def embed(transformer, inputs, kwargs):
 
     return model.encode(inputs, **kwargs)
 
+def clear_gpu_cache(memory_usage: None):
+    if not torch.cuda.is_available():
+        raise PgMLException(f"No GPU availables")
+
+
+    mem_used = torch.cuda.memory_usage()
+    if not memory_usage or mem_used >= int(memory_usage * 100.0):
+        torch.cuda.empty_cache()
+        return True
+    return False
+
 
 def load_dataset(name, subset, limit: None, kwargs: "{}"):
     kwargs = orjson.loads(kwargs)
diff --git a/pgml-extension/src/bindings/transformers.rs b/pgml-extension/src/bindings/transformers.rs
@@ -311,3 +311,26 @@ pub fn load_dataset(
 
     num_rows
 }
+
+pub fn clear_gpu_cache(
+    memory_usage: Option<f32>
+) -> bool {
+
+    Python::with_gil(|py| -> bool {
+        let clear_gpu_cache: Py<PyAny> = PY_MODULE.getattr(py, "clear_gpu_cache").unwrap().into();
+        clear_gpu_cache
+            .call1(
+                py,
+                PyTuple::new(
+                    py,
+                    &[
+                        memory_usage.into_py(py),
+                    ],
+                ),
+            )
+            .unwrap()
+            .extract(py)
+            .unwrap()
+    })
+}
+