From 058a2cd1c402be2d00367e24d664871a152455d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=99=88=E7=AB=A5?=
 <31961076+tongchen126@users.noreply.github.com>
Date: Mon, 17 Nov 2025 17:45:23 +0800
Subject: [PATCH] Initialize chord dataset after accelerator setup in
 GRPOTrainer

The get_chord_sft_dataloader() method relies on GRPOTrainer.accelerator, but the function was previously called before the parent class (super().__init__) finished initializing the accelerator. As a result, the get_chord_sft_dataloader will raise exception regarding non-existent attribute GRPOTrainer.accelerator.
---
 swift/trainers/rlhf_trainer/grpo_trainer.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/swift/trainers/rlhf_trainer/grpo_trainer.py b/swift/trainers/rlhf_trainer/grpo_trainer.py
index 53cc4b5c99..350154a569 100644
--- a/swift/trainers/rlhf_trainer/grpo_trainer.py
+++ b/swift/trainers/rlhf_trainer/grpo_trainer.py
@@ -81,6 +81,7 @@ def __init__(self,
         reward_templates = kwargs.pop('reward_template', None)
         self._prepare_algorithm_params()
         super().__init__(model, ref_model, *_args, **kwargs)
+        self._prepare_chord_dataset()
         self.prepare_rollout()
         self._prepare_rewards(reward_funcs, reward_model, reward_templates)
 
@@ -1868,6 +1869,7 @@ def _prepare_algorithm_params(self):
         self.advantage_estimator = args.advantage_estimator
         self.kl_in_reward = args.kl_in_reward
 
+    def _prepare_chord_dataset(self):
         # CHORD, https://arxiv.org/abs/2508.11408
         self.chord_sft_iterator = None
         if self.chord_sft_dataset: