correct job sizes for bootstrapping on large machines

previously on large machines the analysis would use just few processes for calculating the bootstrapped samples, now we make sure there are at least as many jobs in the iterator as there are cores, or all job sizes are equal 1 and there are as many as there are specified bootstrapps
tlsfuzzer · Jul 27, 2022 · 3f67967 · 3f67967
1 parent 275db63
commit 3f67967
Showing 1 changed file with 3 additions and 2 deletions.
diff --git a/tlsfuzzer/analysis.py b/tlsfuzzer/analysis.py
@@ -698,7 +698,8 @@ def _bootstrap_differences(self, pair, reps=5000):
         _diffs = data.iloc[:, pair.index2] -\
             data.iloc[:, pair.index1]
 
-        job_size = os.cpu_count() * 10
+        job_count = os.cpu_count() * 4
+        job_size = max(reps // job_count, 1)
 
         keys = ("mean", "median", "trim_mean_05", "trim_mean_25", "trimean")
 
@@ -708,7 +709,7 @@ def _bootstrap_differences(self, pair, reps=5000):
                 as pool:
             cent_tend = pool.imap_unordered(
                 self._cent_tend_of_random_sample,
-                chain(repeat(job_size, reps//job_size), [reps % job_size]))
+                chain(repeat(job_size, reps // job_size), [reps % job_size]))
 
             for values in cent_tend:
                 # handle reps % job_size == 0