In [None]:
git clone https://github.com/Jiayi-Pan/TinyZero.git
cd TinyZero

conda create -n tinyzero python=3.10 -y
conda activate tinyzero
pip install torch==2.1.0 transformers==4.36.0 vllm==0.2.5 huggingface_hub datasets

In [None]:
export DATA_DIR="./data/synthetic_rollouts"
mkdir -p $DATA_DIR

In [None]:
wget https://tau-bench.s3.amazonaws.com/tau_bench_v1.0.tar.gz -P $DATA_DIR
tar -xzvf $DATA_DIR/tau_bench_v1.0.tar.gz -C $DATA_DIR
export TAU_DATA="$DATA_DIR/tau_bench_v1.0"

In [None]:
# Model Configs
export N_GPUS=2
export BASE_MODEL="Qwen/Qwen1.5-3B"  # HuggingFace model ID or local path
export DATA_DIR=${DATA_DIR:-"./data/synthetic_rollouts"}  # Default to self-generated data
export ROLLOUT_TP_SIZE=2
export EXPERIMENT_NAME="countdown-qwen2.5-3b-instruct"
export VLLM_ATTENTION_BACKEND="XFORMERS"



In [None]:
python train.py \
  --model_name_or_path $BASE_MODEL \
  --data_path $DATA_DIR \
  --tau_data_path ${TAU_DATA:-""} \  # Only used if TAU-bench is provided
  --output_dir "./checkpoints/$EXPERIMENT_NAME" \
  --num_train_epochs 3 \
  --per_device_train_batch_size 4 \
  --gradient_accumulation_steps 8 \
  --learning_rate 5e-5 \
  --lr_scheduler_type "cosine" \
  --logging_steps 10 \
  --save_steps 500 \
  --bf16 True \
  --tf32 True \
  --use_xformers True \
  --rollout_tp_size $ROLLOUT_TP_SIZE \
  --report_to "wandb"  # Optional

In [None]:
chmod +x ./scripts/train_tiny_zero.sh

# Run training (self-generated data only)
bash ./scripts/train_tiny_zero.sh

# Or with TAU-bench hybrid training
export TAU_DATA="$DATA_DIR/tau_bench_v1.0"
bash ./scripts/train_tiny_zero.sh

In [None]:
./data/
├── synthetic_rollouts/       # Self generated rollouts [ Synthetic Data gen ]
│   ├── rollout_001.jsonl    # Format: {"input": "...", "output": "...", "reward": float}
│   └── ...
└── tau_bench_v1.0/          
    ├── train.jsonl
    └── test.jsonl

./checkpoints/
└── countdown-qwen2.5-3b-instruct/
    ├── pytorch_model.bin
    └── config.json

In [None]:
@misc{tau2023,
  title={TAU-bench: A Benchmark for Real-World Reasoning},
  author={TAU Team},
  year={2023},
  url={https://tau-bench.github.io}
}