From 058a2cd1c402be2d00367e24d664871a152455d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E7=AB=A5?= <31961076+tongchen126@users.noreply.github.com> Date: Mon, 17 Nov 2025 17:45:23 +0800 Subject: [PATCH] Initialize chord dataset after accelerator setup in GRPOTrainer The get_chord_sft_dataloader() method relies on GRPOTrainer.accelerator, but the function was previously called before the parent class (super().__init__) finished initializing the accelerator. As a result, the get_chord_sft_dataloader will raise exception regarding non-existent attribute GRPOTrainer.accelerator. --- swift/trainers/rlhf_trainer/grpo_trainer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/swift/trainers/rlhf_trainer/grpo_trainer.py b/swift/trainers/rlhf_trainer/grpo_trainer.py index 53cc4b5c99..350154a569 100644 --- a/swift/trainers/rlhf_trainer/grpo_trainer.py +++ b/swift/trainers/rlhf_trainer/grpo_trainer.py @@ -81,6 +81,7 @@ def __init__(self, reward_templates = kwargs.pop('reward_template', None) self._prepare_algorithm_params() super().__init__(model, ref_model, *_args, **kwargs) + self._prepare_chord_dataset() self.prepare_rollout() self._prepare_rewards(reward_funcs, reward_model, reward_templates) @@ -1868,6 +1869,7 @@ def _prepare_algorithm_params(self): self.advantage_estimator = args.advantage_estimator self.kl_in_reward = args.kl_in_reward + def _prepare_chord_dataset(self): # CHORD, https://arxiv.org/abs/2508.11408 self.chord_sft_iterator = None if self.chord_sft_dataset: