In [6]:
import json

#input_file = "/home/vlai-gpt-oss/LLaMA-OSS/LLaMA-Factory/data/gsm8k_origin_train_alpaca.jsonl"
input_file = "LLaMA-Factory/data/final/grpo_high.jsonl"
output_file = "/home/vlai-gpt-oss/LLaMA-OSS/grpo_high.jsonl"

def extract_answer(text):
    """Extract answer from response."""
    import re
    match = re.search(r'\\boxed\{([^}]+)\}', text)
    if match:
        return f"\\boxed{{{match.group(1)}}}"
    return ""

with open(input_file, 'r') as fin, open(output_file, 'w') as fout:
    for line in fin:
        data = json.loads(line)
        
        # Extract label from response
        label = extract_answer(data.get('label', ''))
        
        # Convert to GRPO format
        grpo_data = {
            "query": data.get('prompt', ''),
            #"response": data.get('response', ''),  # Optional reference
            "label": label  # Ground truth for reward
        }
        
        fout.write(json.dumps(grpo_data, ensure_ascii=False) + '\n')

print(f"Converted! Output: {output_file}")

Converted! Output: /home/vlai-gpt-oss/LLaMA-OSS/grpo_high.jsonl


In [None]:
'''{"prompt": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?\n\nPlease reason step by step, and put your final answer within \\boxed{}.\nDo NOT include explanations or reasoning in the final answer - only the numeric value in \\boxed{}.", "label": "\\boxed{72}", "dataset": "gsm8k"}
'''

In [2]:
import json

input_file = "/home/vlai-gpt-oss/LLaMA-OSS/LLaMA-Factory/data/competition_math_origin_train_alpaca.jsonl"
output_file = "/home/vlai-gpt-oss/LLaMA-OSS/compmath_grpo.jsonl"

def extract_answer(text):
    """Extract answer from response."""
    import re
    match = re.search(r'\\boxed\{([^}]+)\}', text)
    if match:
        return f"\\boxed{{{match.group(1)}}}"
    return ""

with open(input_file, 'r') as fin, open(output_file, 'w') as fout:
    for line in fin:
        data = json.loads(line)
        
        # Extract label from response
        label = extract_answer(data.get('response', ''))
        
        # Convert to GRPO format
        grpo_data = {
            "query": data.get('prompt', ''),
            "response": data.get('response', ''),  # Optional reference
            "label": label  # Ground truth for reward
        }
        
        fout.write(json.dumps(grpo_data, ensure_ascii=False) + '\n')

print(f"Converted! Output: {output_file}")

Converted! Output: /home/vlai-gpt-oss/LLaMA-OSS/compmath_grpo.jsonl


In [4]:
files_to_merge = ['compmath_grpo.jsonl', 'gsm8k_grpo.jsonl']
output_file = 'merged_grpo_data.jsonl'

with open(output_file, 'w', encoding='utf-8') as outfile:
    for fname in files_to_merge:
        with open(fname, 'r', encoding='utf-8') as infile:
            for line in infile:
                if line.strip():  # Avoid adding empty lines
                    outfile.write(line.strip() + '\n')

print(f"Successfully merged into {output_file}")

Successfully merged into merged_grpo_data.jsonl


In [8]:
import json
import re

def extract_answer_from_response(response, dataset):
    """Extract answer from \boxed{} - STOP at first closing brace"""
    if not response:
        return None
    
    # For LogiQA: Look for A, B, C, D
    if dataset.lower() == 'logiqa':
        think_match = re.search(r'<think>(.*?)</think>', response, re.DOTALL)
        if think_match:
            thinking = think_match.group(1)
            answer_match = re.search(r'(?:answer|option|choice)\s*(?:is|:|=)?\s*([A-D])\b', thinking, re.IGNORECASE)
            if answer_match:
                return answer_match.group(1).upper()
            
            letters = re.findall(r'\b([A-D])\b', thinking)
            if letters:
                return letters[-1].upper()
    
    # For math datasets: Look for #### format
    answer_match = re.search(r'####\s*(.+?)(?:\n|</think>|$)', response)
    if answer_match:
        return answer_match.group(1).strip()
    
    # Look for \boxed{} - extract content UP TO FIRST MATCHING CLOSING BRACE
    boxed_match = re.search(r'\\boxed\{', response)
    if boxed_match:
        start = boxed_match.end()
        text = response[start:]
        
        # Count braces to find matching closing brace
        brace_count = 1
        end_pos = 0
        
        for i, char in enumerate(text):
            if char == '{':
                brace_count += 1
            elif char == '}':
                brace_count -= 1
                if brace_count == 0:
                    end_pos = i
                    break
        
        if end_pos > 0:
            return text[:end_pos].strip()
    
    return None

def convert_to_grpo_format(input_file, output_file):
    """Convert to GRPO format with proper \boxed{} extraction"""
    
    with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'w', encoding='utf-8') as f_out:
        stats = {
            'total': 0,
            'answer_extracted': 0,
            'no_answer': 0,
            'by_dataset': {}
        }
        
        for line in f_in:
            item = json.loads(line)
            stats['total'] += 1
            
            prompt = item.get('prompt', '')
            instruction = item.get('instruction', 'Respond concisely with minimal reasoning.')
            response = item.get('response', '')
            mode = item.get('mode', 'low')
            dataset = item.get('dataset', 'unknown')
            
            if dataset not in stats['by_dataset']:
                stats['by_dataset'][dataset] = {'total': 0, 'success': 0}
            stats['by_dataset'][dataset]['total'] += 1
            
            # Build query
            format_instruction = "provide your reasoning in <think> tags, then put your final answer in \\boxed{}"
            query = f"{instruction}\n\n{format_instruction}\n\n{prompt}"
            
            # Extract answer - STOPS at matching closing brace
            extracted_answer = extract_answer_from_response(response, dataset)
            
            if extracted_answer:
                stats['answer_extracted'] += 1
                stats['by_dataset'][dataset]['success'] += 1
                
                # Print first few to verify
                if stats['answer_extracted'] <= 5:
                    print(f"  Extracted: '{extracted_answer}'")
                
                ground_truth = f"\\boxed{{{extracted_answer}}}"
            else:
                stats['no_answer'] += 1
                ground_truth = "\\boxed{unknown}"
            
            grpo_item = {
                'query': query,
                'mode': mode,
                'answer': ground_truth
            }
            
            f_out.write(json.dumps(grpo_item, ensure_ascii=False) + '\n')
        
        print(f"\n‚úÖ Conversion complete!")
        print(f"  Total: {stats['total']}")
        print(f"  Extracted: {stats['answer_extracted']} ({100*stats['answer_extracted']/stats['total']:.1f}%)")
        print(f"  No answer: {stats['no_answer']}")
        
        print(f"\nüìä By dataset:")
        for dataset, counts in stats['by_dataset'].items():
            rate = 100*counts['success']/counts['total'] if counts['total'] > 0 else 0
            print(f"  {dataset}: {counts['success']}/{counts['total']} ({rate:.1f}%)")

# Convert
convert_to_grpo_format(
    '/workspace/LLaMA-OSS/LLaMA-Factory/data/combined_grpo_train.jsonl',
    '/workspace/LLaMA-OSS/train_grpo.jsonl'
)

print("\n" + "="*80)
print("Sample outputs:")
print("="*80)

with open('/workspace/LLaMA-OSS/train_grpo.jsonl', 'r') as f:
    for i, line in enumerate(f):
        if i >= 5:
            break
        data = json.loads(line)
        print(f"{i+1}. {data['answer']}")

  Extracted: '72'
  Extracted: '72'
  Extracted: '72'
  Extracted: '10'
  Extracted: '10'

‚úÖ Conversion complete!
  Total: 26106
  Extracted: 26094 (100.0%)
  No answer: 12

üìä By dataset:
  gsm8k: 9132/9132 (100.0%)
  logiqa: 5298/5298 (100.0%)
  compmath: 11664/11676 (99.9%)

Sample outputs:
1. \boxed{72}
2. \boxed{72}
3. \boxed{72}
4. \boxed{10}
5. \boxed{10}


In [2]:
import json
import numpy as np
from collections import defaultdict
import re

def analyze_thinking_lengths(files):
    """
    Analyze thinking lengths for each file and mode
    """
    
    results = {}
    
    for file_path in files:
        print(f"\n{'='*80}")
        print(f"Analyzing: {file_path}")
        print(f"{'='*80}")
        
        # Read data
        data = []
        try:
            with open(file_path, 'r') as f:
                for line in f:
                    data.append(json.loads(line))
        except FileNotFoundError:
            print(f"‚ùå File not found: {file_path}")
            continue
        
        print(f"Total examples: {len(data)}")
        
        # Group by mode and extract thinking lengths
        mode_lengths = defaultdict(list)
        
        for item in data:
            mode = item.get('mode', 'unknown').lower()
            response = item.get('response', '')
            
            # Extract thinking content
            match = re.search(r'<think>(.*?)</think>', response, re.DOTALL)
            if match:
                thinking = match.group(1).strip()
                # Approximate token count (characters / 4)
                length = len(thinking) // 4
                mode_lengths[mode].append(length)
        
        # Calculate statistics for each mode
        file_results = {}
        for mode in ['low', 'medium', 'high']:
            if mode in mode_lengths:
                lengths = mode_lengths[mode]
                mean = np.mean(lengths)
                std = np.std(lengths)
                
                file_results[mode] = {
                    'count': len(lengths),
                    'mean': mean,
                    'std': std,
                    'min': min(lengths),
                    'max': max(lengths),
                    'median': np.median(lengths)
                }
                
                print(f"\n{mode.upper()}:")
                print(f"  Count: {len(lengths)}")
                print(f"  Mean: {mean:.2f} tokens")
                print(f"  Std: {std:.2f} tokens")
                print(f"  Min: {min(lengths)} tokens")
                print(f"  Max: {max(lengths)} tokens")
                print(f"  Median: {np.median(lengths):.0f} tokens")
        
        results[file_path] = file_results
    
    # Summary table
    print("\n" + "="*80)
    print("SUMMARY TABLE")
    print("="*80)
    print(f"\n{'File':<40} {'Mode':<10} {'Count':<8} {'Mean':<10} {'Std':<10}")
    print("-" * 80)
    
    for file_path, modes in results.items():
        filename = file_path.split('/')[-1]
        for mode in ['low', 'medium', 'high']:
            if mode in modes:
                stats = modes[mode]
                print(f"{filename:<40} {mode:<10} {stats['count']:<8} {stats['mean']:<10.1f} {stats['std']:<10.1f}")
    
    # Code output
    print("\n" + "="*80)
    print("FOR REWARD MODEL CODE:")
    print("="*80)
    
    for file_path, modes in results.items():
        filename = file_path.split('/')[-1]
        print(f"\n# {filename}")
        print("self.mode_params = {")
        for mode in ['low', 'medium', 'high']:
            if mode in modes:
                mean = int(modes[mode]['mean'])
                std = int(modes[mode]['std'])
                print(f"    '{mode}': {{'mean': {mean}, 'std': {std}}},")
        print("}")

# Analyze all three files
files = [
    '/workspace/LLaMA-OSS/LLaMA-Factory/data/combined_sft_train_high.jsonl',
    '/workspace/LLaMA-OSS/LLaMA-Factory/data/combined_sft_train_low.jsonl',
    '/workspace/LLaMA-OSS/LLaMA-Factory/data/combined_sft_train_medium.jsonl'
]

analyze_thinking_lengths(files)


Analyzing: /workspace/LLaMA-OSS/LLaMA-Factory/data/combined_sft_train_high.jsonl
Total examples: 5641



HIGH:
  Count: 5641
  Mean: 1514.27 tokens
  Std: 452.97 tokens
  Min: 424 tokens
  Max: 2880 tokens
  Median: 1470 tokens

Analyzing: /workspace/LLaMA-OSS/LLaMA-Factory/data/combined_sft_train_low.jsonl
Total examples: 10388

LOW:
  Count: 10388
  Mean: 104.75 tokens
  Std: 53.86 tokens
  Min: 24 tokens
  Max: 337 tokens
  Median: 94 tokens

Analyzing: /workspace/LLaMA-OSS/LLaMA-Factory/data/combined_sft_train_medium.jsonl
Total examples: 9485

MEDIUM:
  Count: 9485
  Mean: 501.84 tokens
  Std: 236.30 tokens
  Min: 104 tokens
  Max: 1356 tokens
  Median: 455 tokens

SUMMARY TABLE

File                                     Mode       Count    Mean       Std       
--------------------------------------------------------------------------------
combined_sft_train_high.jsonl            high       5641     1514.3     453.0     
combined_sft_train_low.jsonl             low        10388    104.7      53.9      
combined_sft_train_medium.jsonl          medium     9485     501.8      236.3   