In [3]:
import os
import json
import glob
from typing import List, Dict, Any
import pandas as pd

In [4]:

def load_jsonl_files(directory_path: str) -> List[Dict[str, Any]]:
    """
    Load all JSONL files from a directory and combine the data.
    
    Args:
        directory_path: Path to the directory containing JSONL files
        
    Returns:
        A list of all examples from all JSONL files
    """
    # Get all JSONL files in the directory
    jsonl_files = glob.glob(os.path.join(directory_path, "*.jsonl"))
    
    if not jsonl_files:
        print(f"No JSONL files found in {directory_path}")
        return []
    
    # Sort the files to process them in order
    jsonl_files.sort()
    
    all_examples = []
    file_stats = {}
    
    # Process each file
    for file_path in jsonl_files:
        file_name = os.path.basename(file_path)
        print(f"Processing file: {file_name}")
        
        examples = []
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                for line in f:
                    if line.strip():  # Skip empty lines
                        example = json.loads(line)
                        examples.append(example)
            
            file_stats[file_name] = len(examples)
            all_examples.extend(examples)
            
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON in file {file_name}: {e}")
        except Exception as e:
            print(f"Error processing file {file_name}: {e}")
    
    # Print summary statistics
    print("\nSummary:")
    print(f"Total files processed: {len(jsonl_files)}")
    print(f"Total examples loaded: {len(all_examples)}")
    print("\nExamples per file:")
    for file_name, count in file_stats.items():
        print(f"  {file_name}: {count} examples")
    
    return all_examples

In [None]:
directory_path = "results" 

all_data = load_jsonl_files(directory_path)

Processing file: 0_15000.jsonl
Processing file: 105000_120000.jsonl
Processing file: 120000_135000.jsonl
Processing file: 135000_150000.jsonl
Processing file: 150000_165000.jsonl
Processing file: 15000_30000.jsonl
Processing file: 165000_180000.jsonl
Processing file: 180000_195000.jsonl
Processing file: 195000_210000.jsonl
Processing file: 210000_225000.jsonl
Processing file: 225000_240000.jsonl
Processing file: 240000_255000.jsonl
Processing file: 255000_270000.jsonl
Processing file: 270000_285000.jsonl
Processing file: 285000_300000.jsonl
Processing file: 300000_315000.jsonl
Processing file: 30000_45000.jsonl
Processing file: 315000_330000.jsonl
Processing file: 45000_60000.jsonl
Processing file: 60000_75000.jsonl
Processing file: 75000_90000.jsonl
Processing file: 90000_105000.jsonl

Summary:
Total files processed: 22
Total examples loaded: 327680

Examples per file:
  0_15000.jsonl: 15000 examples
  105000_120000.jsonl: 15000 examples
  120000_135000.jsonl: 15000 examples
  135000_

In [34]:
len(all_data)

327680

In [10]:
import pandas as pd

In [11]:
data = pd.read_parquet("train.parquet")

In [None]:
"""<|im_start|>system\nYou are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer.<|im_end|>\n<|im_start|>user\n Using the numbers {numbers}, create an equation that equals {target}. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.<|im_end|>\n<|im_start|>assistant\nLet me solve this step by step.\n<think>"""

In [None]:
message = [{"role": "system", "content": "You are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer."},
           {"role": "user", "content": "Using the numbers {numbers}, create an equation that equals {target}. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>."}
           ]

In [19]:
system_prompt = "You are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer."
user_prompt = "Using the numbers {numbers}, create an equation that equals {target}. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>."

In [22]:
data.head()

Unnamed: 0,target,nums,data_source,prompt,ability,reward_model,extra_info
0,98,"[44, 19, 35]",countdown,[{'content': '<|im_start|>system You are a hel...,math,"{'ground_truth': {'numbers': [44, 19, 35], 'ta...","{'index': 0, 'split': 'train'}"
1,64,"[63, 95, 96]",countdown,[{'content': '<|im_start|>system You are a hel...,math,"{'ground_truth': {'numbers': [63, 95, 96], 'ta...","{'index': 1, 'split': 'train'}"
2,28,"[95, 11, 56]",countdown,[{'content': '<|im_start|>system You are a hel...,math,"{'ground_truth': {'numbers': [95, 11, 56], 'ta...","{'index': 2, 'split': 'train'}"
3,48,"[19, 74, 45]",countdown,[{'content': '<|im_start|>system You are a hel...,math,"{'ground_truth': {'numbers': [19, 74, 45], 'ta...","{'index': 3, 'split': 'train'}"
4,17,"[49, 41, 73]",countdown,[{'content': '<|im_start|>system You are a hel...,math,"{'ground_truth': {'numbers': [49, 41, 73], 'ta...","{'index': 4, 'split': 'train'}"


In [26]:
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [30]:
dataset = load_dataset("parquet", data_files="train.parquet")

def get_sample(numbers, target):
    message = [{"role": "system", "content": system_prompt},
               {"role": "user", "content": user_prompt.format(numbers=numbers, target=target)}]
    return {"messages": message}

dataset = dataset.map(lambda x: get_sample(x["nums"], x["target"]))



Map: 100%|██████████| 327680/327680 [00:49<00:00, 6600.02 examples/s]


In [38]:
next(iter(dataset["train"]))

{'target': 98,
 'nums': [44, 19, 35],
 'data_source': 'countdown',
 'prompt': [{'content': '<|im_start|>system\nYou are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer.<|im_end|>\n<|im_start|>user\n Using the numbers [44, 19, 35], create an equation that equals 98. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.<|im_end|>\n<|im_start|>assistant\nLet me solve this step by step.\n<think>',
   'role': 'user'}],
 'ability': 'math',
 'reward_model': {'ground_truth': {'numbers': [44, 19, 35], 'target': 98},
  'style': 'rule'},
 'extra_info': {'index': 0, 'split': 'train'},
 'messages': [{'content': 'You are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer.',
   'role'

In [46]:
import countdown_verifier
from tqdm import tqdm

def get_correct_samples(data, model_responses):
    correct_samples = []
    for data_sample, model_response in tqdm(zip(data, model_responses),total=len(data)):
        model_response = model_response[1]['content']
        ground_truth = {'target': data_sample["target"], 'numbers': data_sample["nums"]}
        score = countdown_verifier.compute_score(
                solution_str=model_response,
                ground_truth=ground_truth,
                method='strict',
                format_score=0.1,
                score=1.
            )
        if score == 1.0:
            messages = data_sample["messages"]
            messages.append({"role": "assistant", "content": "Let me solve this step by step.\n<think>" + model_response})
        
            correct_samples.append(messages)
        
    return correct_samples

correct_samples = get_correct_samples(dataset['train'], all_data)

In [49]:
len(correct_samples)

6645

In [51]:
with open("correct_samples.jsonl", "w") as f:

    for sample in correct_samples:
        sample = {"messages": sample}
        f.write(json.dumps(sample) + "\n")

In [None]:
from tqdm import tqdm

all_messages = []

dataset = 
for i, row in tqdm(data.iterrows(), total=len(data)):
    numbers = row["nums"]
    target = row["target"]
    
    message = [{"role": "system", "content": system_prompt},
               {"role": "user", "content": user_prompt.format(numbers=numbers, target=target)}]
    all_messages.append(message)

100%|██████████| 327680/327680 [00:53<00:00, 6078.46it/s]


In [42]:
next(iter(dataset["train"]))

{'target': 98,
 'nums': [44, 19, 35],
 'data_source': 'countdown',
 'prompt': [{'content': '<|im_start|>system\nYou are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer.<|im_end|>\n<|im_start|>user\n Using the numbers [44, 19, 35], create an equation that equals 98. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. Show your work in <think> </think> tags. And return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>.<|im_end|>\n<|im_start|>assistant\nLet me solve this step by step.\n<think>',
   'role': 'user'}],
 'ability': 'math',
 'reward_model': {'ground_truth': {'numbers': [44, 19, 35], 'target': 98},
  'style': 'rule'},
 'extra_info': {'index': 0, 'split': 'train'},
 'messages': [{'content': 'You are a helpful assistant. You first thinks about the reasoning process in the mind and then provides the user with the answer.',
   'role'