From 94bde0dba423623c8fb7d5fcb7bb21896d58c81d Mon Sep 17 00:00:00 2001 From: Corey Ostrove Date: Sun, 5 Oct 2025 22:35:59 -0600 Subject: [PATCH] Fix a shared memory bug in lsvec Fixes a shared memory bug in TimeIndependentMDCObjectiveFunction which caused the square root of the objective function terms to be applied multiple times. These changes should hopefully guard those in-place updates by checking that the shared memory leader is the only one making the changes. --- pygsti/objectivefns/objectivefns.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/pygsti/objectivefns/objectivefns.py b/pygsti/objectivefns/objectivefns.py index a88975f16..81e40b1ca 100644 --- a/pygsti/objectivefns/objectivefns.py +++ b/pygsti/objectivefns/objectivefns.py @@ -4374,12 +4374,15 @@ def terms(self, paramvec=None, oob_check=False, profiler_str="TERMS OBJECTIVE"): self.model.from_vector(paramvec) terms = self.obj.view() + # Whether this rank is the "leader" of all the processors accessing the same shared self.jac and self.probs mem. + # Only leader processors should modify the contents of the shared memory, so we only apply operations *once* + # `unit_ralloc` is the group of all the procs targeting same destination into self.obj unit_ralloc = self.layout.resource_alloc('atom-processing') shared_mem_leader = unit_ralloc.is_host_leader with self.resource_alloc.temporarily_track_memory(self.nelements): # 'e' (terms) - self.model.sim.bulk_fill_probs(self.probs, self.layout) - self._clip_probs() + self.model.sim.bulk_fill_probs(self.probs, self.layout) # syncs shared mem + self._clip_probs() # clips self.probs in place w/shared mem sync if oob_check: # Only used for termgap cases if not self.model.sim.bulk_test_if_paths_are_sufficient(self.layout, self.probs, verbosity=1): @@ -4412,11 +4415,15 @@ def lsvec(self, paramvec=None, oob_check=False, raw_objfn_lsvec_signs=True): in {bad_locs}. """ raise RuntimeError(msg) - lsvec **= 0.5 + unit_ralloc = self.layout.resource_alloc('atom-processing') + shared_mem_leader = unit_ralloc.is_host_leader + if shared_mem_leader: + lsvec **= 0.5 if raw_objfn_lsvec_signs: - if self.layout.resource_alloc('atom-processing').is_host_leader: + if shared_mem_leader: raw_lsvec = self.raw_objfn.lsvec(self.probs, self.counts, self.total_counts, self.freqs) lsvec[:self.nelements][raw_lsvec < 0] *= -1 + unit_ralloc.host_comm_barrier() return lsvec def dterms(self, paramvec=None):