From 64367a06d3b2aabb623228e0f19d4fb23c6cf74e Mon Sep 17 00:00:00 2001
From: Robert Shaw
 <114415538+robertgshaw2-neuralmagic@users.noreply.github.com>
Date: Sun, 12 May 2024 20:46:31 -0400
Subject: [PATCH] [CI/Build] Tweak Marlin Nondeterminism Issues (#4713)

---
 tests/models/test_gptq_marlin.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tests/models/test_gptq_marlin.py b/tests/models/test_gptq_marlin.py
index 891262541cd0..b1c2b88bc99a 100644
--- a/tests/models/test_gptq_marlin.py
+++ b/tests/models/test_gptq_marlin.py
@@ -1,13 +1,11 @@
 """Compares the outputs of gptq vs gptq_marlin 
 Note: GPTQ and Marlin do not have bitwise correctness.
 As a result, in this test, we just confirm that the top selected tokens of the
-Marlin/GPTQ models are in the top 3 selections of each other.
+Marlin/GPTQ models are in the top 5 selections of each other.
 Note: Marlin internally uses locks to synchronize the threads. This can
 result in very slight nondeterminism for Marlin. As a result, we re-run the test
 up to 3 times to see if we pass.
-Note: This test currently fails running with --forked with the following:
-    RuntimeError: Cannot re-initialize CUDA in forked subprocess.
-    To use CUDA with multiprocessing, you must use the 'spawn' start method
+
 Run `pytest tests/models/test_gptq_marlin.py`.
 """
 import os
@@ -49,7 +47,7 @@
 ]
 
 
-@pytest.mark.flaky(reruns=2)
+@pytest.mark.flaky(reruns=3)
 @pytest.mark.skipif(gptq_marlin_not_supported,
                     reason="gptq_marlin is not supported on this GPU type.")
 @pytest.mark.parametrize("model", MODELS)