diff --git a/src/envs/textarena_env/rewards.py b/src/envs/textarena_env/rewards.py index 964a57a6..bbd78b54 100644 --- a/src/envs/textarena_env/rewards.py +++ b/src/envs/textarena_env/rewards.py @@ -61,19 +61,18 @@ def extract_feedback_counts(feedback: str) -> Tuple[int, int]: if not feedback: return (0, 0) - segments = [ - segment.strip() for segment in feedback.split("\n\n") if segment.strip() - ] - if not segments: + lines = [line.strip() for line in feedback.split("\n") if line.strip()] + if len(lines) < 2: return (0, 0) - latest_segment = segments[-1] - lines = [line.strip() for line in latest_segment.splitlines() if line.strip()] - latest_line = lines[-1] if lines else latest_segment + for line in lines: + normalized = line.replace(" ", "") + if normalized and all(c in "GYX" for c in normalized): + green = normalized.count("G") + yellow = normalized.count("Y") + return (green, yellow) - green_count = latest_line.count("G") - yellow_count = latest_line.count("Y") - return (green_count, yellow_count) + return (0, 0) class _WordleRewardProvider: