From 4cdfb78e68c2ca74fad677aee4791468fec2d3a9 Mon Sep 17 00:00:00 2001 From: Allen Wang <9057208+allenwang28@users.noreply.github.com> Date: Mon, 13 Oct 2025 12:10:36 -0700 Subject: [PATCH 1/3] initial commit --- {apps => .meta}/mast/README.md | 7 +- {apps => .meta}/mast/__init__.py | 0 {apps => .meta}/mast/env_setup.sh | 80 +++++++++++++++++++++-- {apps => .meta}/mast/main.py | 0 {apps => .meta}/mast/qwen3_14b_mast.yaml | 2 +- {apps => .meta}/mast/qwen3_1_7b_mast.yaml | 2 +- {apps => .meta}/mast/qwen3_32b_mast.yaml | 2 +- {apps => .meta}/mast/qwen3_4b_mast.yaml | 2 +- {apps => .meta}/mast/qwen3_8b_mast.yaml | 2 +- 9 files changed, 83 insertions(+), 14 deletions(-) rename {apps => .meta}/mast/README.md (77%) rename {apps => .meta}/mast/__init__.py (100%) rename {apps => .meta}/mast/env_setup.sh (71%) rename {apps => .meta}/mast/main.py (100%) rename {apps => .meta}/mast/qwen3_14b_mast.yaml (98%) rename {apps => .meta}/mast/qwen3_1_7b_mast.yaml (98%) rename {apps => .meta}/mast/qwen3_32b_mast.yaml (98%) rename {apps => .meta}/mast/qwen3_4b_mast.yaml (98%) rename {apps => .meta}/mast/qwen3_8b_mast.yaml (98%) diff --git a/apps/mast/README.md b/.meta/mast/README.md similarity index 77% rename from apps/mast/README.md rename to .meta/mast/README.md index 60a9b4146..e7c05c5b3 100644 --- a/apps/mast/README.md +++ b/.meta/mast/README.md @@ -1,6 +1,7 @@ # Forge MAST Environment Setup A simple setup script to automatically configure your environment for running Forge with MAST jobs. +This only applies to Meta internal users. ## Quick Start @@ -9,7 +10,7 @@ A simple setup script to automatically configure your environment for running Fo ### 1. Run the Setup Script The `env_setup.sh` script will automatically: -- ✅ Activate the required conda environment (`forge-8448524`) +- ✅ Activate and configure the required conda environment - ✅ Clone/update the Forge repository - ✅ Install Forge package dependencies - ✅ Mount the required oilfs workspace to `/mnt/wsfuse` @@ -20,14 +21,14 @@ The `env_setup.sh` script will automatically: chmod +x env_setup.sh # Run the setup -./apps/mast/env_setup.sh +./.meta/mast/env_setup.sh ``` ### 2. Submit MAST job ``` -pip install --force-reinstall --no-deps . && python -m apps.mast.main --config apps/mast/qwen3_1_7b_mast.yaml +pip install --force-reinstall --no-deps . && python -m .meta.mast.main --config .meta/mast/qwen3_1_7b_mast.yaml ``` ⚠️ Important Note: `pip install --force-reinstall --no-deps .` is required every time you make a change to the local codebase. This ensures your latest changes are installed before job submission. diff --git a/apps/mast/__init__.py b/.meta/mast/__init__.py similarity index 100% rename from apps/mast/__init__.py rename to .meta/mast/__init__.py diff --git a/apps/mast/env_setup.sh b/.meta/mast/env_setup.sh similarity index 71% rename from apps/mast/env_setup.sh rename to .meta/mast/env_setup.sh index 8d14371ac..feef663b7 100755 --- a/apps/mast/env_setup.sh +++ b/.meta/mast/env_setup.sh @@ -9,6 +9,9 @@ # setup_forge_env.sh - Setup conda environment and install forge with mounting set -e # Exit on any error +# Configuration +CONDA_ENV_NAME="forge:stable" + # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' @@ -45,6 +48,7 @@ mount_workspace() { log_info "Creating mount directory: $mount_dir" sudo mkdir -p "$mount_dir" || { log_error "Failed to create mount directory (may need sudo privileges)" + log_error "You could alternatively try to unmount with `sudo umount /mnt/wsfuse`" return 1 } fi @@ -130,10 +134,10 @@ if [ ! -f "$CONDA_SCRIPT_PATH" ]; then fi log_info "Sourcing conda script: $CONDA_SCRIPT_PATH" -source "$CONDA_SCRIPT_PATH" activate forge:e146614 +source "$CONDA_SCRIPT_PATH" activate "$CONDA_ENV_NAME" if [ $? -ne 0 ]; then - log_error "Failed to activate conda environment forge-e146614" + log_error "Failed to activate conda environment $CONDA_ENV_NAME" exit 1 fi @@ -191,8 +195,72 @@ fi log_info "Current directory: $(pwd)" -# Step 5: Install forge package -log_info "Step 5: Installing forge package..." +# Step 5: Install torchtitan +log_info "Step 5: Installing torchtitan..." + +# Source versions.sh to get the pinned commit +VERSIONS_FILE="$FORGE_REPO_DIR/assets/versions.sh" +if [ -f "$VERSIONS_FILE" ]; then + log_info "Sourcing version information from: $VERSIONS_FILE" + source "$VERSIONS_FILE" + + if [ -n "$TORCHTITAN_COMMIT" ]; then + log_info "Installing torchtitan from commit: $TORCHTITAN_COMMIT" + pip uninstall -y torchtitan + pip install "git+https://github.com/pytorch/torchtitan.git@$TORCHTITAN_COMMIT" + + if [ $? -eq 0 ]; then + log_info "Torchtitan installed successfully" + else + log_error "Failed to install torchtitan" + exit 1 + fi + else + log_error "TORCHTITAN_COMMIT not found in versions.sh" + exit 1 + fi +else + log_error "versions.sh not found at: $VERSIONS_FILE" + log_error "Cannot proceed without version information" + exit 1 +fi + +# Step 5.5: Apply monarch torch import hack +log_info "Step 5.5: Applying monarch torch import hack..." + +MONARCH_INIT="$CONDA_PREFIX/lib/python3.10/site-packages/monarch/__init__.py" +if [ -f "$MONARCH_INIT" ]; then + # Check if we already applied the hack + if grep -q "^import torch # Injected by forge setup" "$MONARCH_INIT"; then + log_info "Monarch torch import hack already applied, skipping" + else + log_info "Injecting 'import torch' into monarch/__init__.py" + + # Create a backup + cp "$MONARCH_INIT" "$MONARCH_INIT.bak" + + # Use sed to inject 'import torch' before the "# Import before monarch" comment + # We add it right after "from typing import TYPE_CHECKING" and before the comment + sed -i '/^from typing import TYPE_CHECKING$/a\ +\ +# Torch must be imported before monarch (injected by forge setup)\ +import torch # Injected by forge setup' "$MONARCH_INIT" + + if [ $? -eq 0 ]; then + log_info "Successfully injected torch import into monarch/__init__.py" + else + log_error "Failed to inject torch import, restoring backup" + mv "$MONARCH_INIT.bak" "$MONARCH_INIT" + exit 1 + fi + fi +else + log_warn "monarch/__init__.py not found at: $MONARCH_INIT" + log_warn "Skipping monarch torch import hack (monarch may not be installed yet)" +fi + +# Step 6: Install forge package +log_info "Step 6: Installing forge package..." pip install --no-deps --force-reinstall . if [ $? -ne 0 ]; then log_error "Failed to install forge package" @@ -234,5 +302,5 @@ log_info "Mounted workspace available at: /mnt/wsfuse" echo "" log_info "Installation completed successfully!" echo "" -log_info "Re-activate the conda environment to make the changes take effect:" -log_info "conda deactivate && conda activate forge-e146614" +log_info "Test that this is working locally with:" +log_info "python -m apps.grpo.main --config=apps/grpo/qwen3_1_7b.yaml" diff --git a/apps/mast/main.py b/.meta/mast/main.py similarity index 100% rename from apps/mast/main.py rename to .meta/mast/main.py diff --git a/apps/mast/qwen3_14b_mast.yaml b/.meta/mast/qwen3_14b_mast.yaml similarity index 98% rename from apps/mast/qwen3_14b_mast.yaml rename to .meta/mast/qwen3_14b_mast.yaml index d9e9d7edd..abd0b15ed 100644 --- a/apps/mast/qwen3_14b_mast.yaml +++ b/.meta/mast/qwen3_14b_mast.yaml @@ -1,5 +1,5 @@ # Grouped Relative Policy Optimization (GRPO) -# >>> python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml +# >>> python -m .meta.mast.main --config .meta/mast/qwen3_14b_mast.yaml # Global configuration group_size: 8 diff --git a/apps/mast/qwen3_1_7b_mast.yaml b/.meta/mast/qwen3_1_7b_mast.yaml similarity index 98% rename from apps/mast/qwen3_1_7b_mast.yaml rename to .meta/mast/qwen3_1_7b_mast.yaml index 5c1033db2..dda3e7046 100644 --- a/apps/mast/qwen3_1_7b_mast.yaml +++ b/.meta/mast/qwen3_1_7b_mast.yaml @@ -1,5 +1,5 @@ # Grouped Relative Policy Optimization (GRPO) -# >>> python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml +# >>> python -m .meta.mast.main --config .meta/mast/qwen3_1_7b_mast.yaml # Global configuration group_size: 8 diff --git a/apps/mast/qwen3_32b_mast.yaml b/.meta/mast/qwen3_32b_mast.yaml similarity index 98% rename from apps/mast/qwen3_32b_mast.yaml rename to .meta/mast/qwen3_32b_mast.yaml index f0e57edac..10becc9bc 100644 --- a/apps/mast/qwen3_32b_mast.yaml +++ b/.meta/mast/qwen3_32b_mast.yaml @@ -1,5 +1,5 @@ # Grouped Relative Policy Optimization (GRPO) -# >>> python -m apps.mast.main --config apps/mast/qwen3_1_7b_mast.yaml +# >>> python -m .meta.mast.main --config .meta/mast/qwen3_32b_mast.yaml # Global configuration group_size: 8 diff --git a/apps/mast/qwen3_4b_mast.yaml b/.meta/mast/qwen3_4b_mast.yaml similarity index 98% rename from apps/mast/qwen3_4b_mast.yaml rename to .meta/mast/qwen3_4b_mast.yaml index 2a8d2b864..8cf7596d9 100644 --- a/apps/mast/qwen3_4b_mast.yaml +++ b/.meta/mast/qwen3_4b_mast.yaml @@ -1,5 +1,5 @@ # Grouped Relative Policy Optimization (GRPO) -# >>> python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml +# >>> python -m .meta.mast.main --config .meta/mast/qwen3_4b_mast.yaml # Global configuration group_size: 8 diff --git a/apps/mast/qwen3_8b_mast.yaml b/.meta/mast/qwen3_8b_mast.yaml similarity index 98% rename from apps/mast/qwen3_8b_mast.yaml rename to .meta/mast/qwen3_8b_mast.yaml index 81c1f75dd..f28abf02d 100644 --- a/apps/mast/qwen3_8b_mast.yaml +++ b/.meta/mast/qwen3_8b_mast.yaml @@ -1,5 +1,5 @@ # Grouped Relative Policy Optimization (GRPO) -# >>> python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml +# >>> python -m .meta.mast.main --config .meta/mast/qwen3_8b_mast.yaml # Global configuration group_size: 8 From e102bd84a520b885b33912418d1510db52677198 Mon Sep 17 00:00:00 2001 From: Allen Wang <9057208+allenwang28@users.noreply.github.com> Date: Mon, 13 Oct 2025 15:28:14 -0400 Subject: [PATCH 2/3] Update .meta/mast/README.md Co-authored-by: Danielle Pintz <38207072+daniellepintz@users.noreply.github.com> --- .meta/mast/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.meta/mast/README.md b/.meta/mast/README.md index e7c05c5b3..7eb4efc00 100644 --- a/.meta/mast/README.md +++ b/.meta/mast/README.md @@ -18,7 +18,7 @@ The `env_setup.sh` script will automatically: ```bash # Make the script executable -chmod +x env_setup.sh +chmod +x .meta/mast/env_setup.sh # Run the setup ./.meta/mast/env_setup.sh From a8a0c2b3dd7d93738f22d53963e9d0863f1401de Mon Sep 17 00:00:00 2001 From: Allen Wang <9057208+allenwang28@users.noreply.github.com> Date: Mon, 13 Oct 2025 12:40:08 -0700 Subject: [PATCH 3/3] need to test launcher script --- .meta/mast/README.md | 18 +++++++-- .meta/mast/launch.sh | 68 +++++++++++++++++++++++++++++++++ .meta/mast/qwen3_14b_mast.yaml | 2 +- .meta/mast/qwen3_1_7b_mast.yaml | 2 +- .meta/mast/qwen3_32b_mast.yaml | 2 +- .meta/mast/qwen3_4b_mast.yaml | 2 +- .meta/mast/qwen3_8b_mast.yaml | 2 +- 7 files changed, 88 insertions(+), 8 deletions(-) create mode 100755 .meta/mast/launch.sh diff --git a/.meta/mast/README.md b/.meta/mast/README.md index 7eb4efc00..e6f64d739 100644 --- a/.meta/mast/README.md +++ b/.meta/mast/README.md @@ -27,8 +27,20 @@ chmod +x .meta/mast/env_setup.sh ### 2. Submit MAST job +Use the launch script to submit a MAST job: + +```bash +# Make the launch script executable (first time only) +chmod +x .meta/mast/launch.sh + +# Launch a job with your desired config +./.meta/mast/launch.sh .meta/mast/qwen3_1_7b_mast.yaml ``` -pip install --force-reinstall --no-deps . && python -m .meta.mast.main --config .meta/mast/qwen3_1_7b_mast.yaml -``` -⚠️ Important Note: `pip install --force-reinstall --no-deps .` is required every time you make a change to the local codebase. This ensures your latest changes are installed before job submission. +The launch script will automatically: +- Navigate to the forge root directory +- Reinstall the forge package with your latest changes +- Set the correct PYTHONPATH +- Launch the MAST job with the specified config + +You can run it from anywhere, and it will figure out the correct paths. diff --git a/.meta/mast/launch.sh b/.meta/mast/launch.sh new file mode 100755 index 000000000..46da56d12 --- /dev/null +++ b/.meta/mast/launch.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# launch.sh - Launch MAST jobs with Forge +set -e # Exit on any error + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Logging functions +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Check if config file is provided +if [ $# -eq 0 ]; then + log_error "No config file provided" + echo "Usage: $0 " + echo "Example: $0 .meta/mast/qwen3_1_7b_mast.yaml" + exit 1 +fi + +CONFIG_FILE="$1" + +# Get the directory where this script is located +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# Navigate to forge root (two levels up from .meta/mast/) +FORGE_ROOT="$( cd "$SCRIPT_DIR/../.." && pwd )" + +log_info "Forge root directory: $FORGE_ROOT" +log_info "Config file: $CONFIG_FILE" + +# Check if config file exists +if [ ! -f "$FORGE_ROOT/$CONFIG_FILE" ]; then + log_error "Config file not found: $FORGE_ROOT/$CONFIG_FILE" + exit 1 +fi + +# Navigate to forge root +cd "$FORGE_ROOT" +log_info "Changed to directory: $(pwd)" + +# Reinstall forge package +log_info "Reinstalling forge package..." +pip install --force-reinstall --no-deps . +if [ $? -ne 0 ]; then + log_error "Failed to reinstall forge package" + exit 1 +fi + +log_info "Successfully reinstalled forge package" + +# Launch the job +log_info "Launching MAST job..." +PYTHONPATH=. python .meta/mast/main.py --config "$CONFIG_FILE" diff --git a/.meta/mast/qwen3_14b_mast.yaml b/.meta/mast/qwen3_14b_mast.yaml index abd0b15ed..f1f05825f 100644 --- a/.meta/mast/qwen3_14b_mast.yaml +++ b/.meta/mast/qwen3_14b_mast.yaml @@ -1,5 +1,5 @@ # Grouped Relative Policy Optimization (GRPO) -# >>> python -m .meta.mast.main --config .meta/mast/qwen3_14b_mast.yaml +# >>> ./.meta/mast/launch.sh .meta/mast/qwen3_14b_mast.yaml # Global configuration group_size: 8 diff --git a/.meta/mast/qwen3_1_7b_mast.yaml b/.meta/mast/qwen3_1_7b_mast.yaml index dda3e7046..39aaf01ba 100644 --- a/.meta/mast/qwen3_1_7b_mast.yaml +++ b/.meta/mast/qwen3_1_7b_mast.yaml @@ -1,5 +1,5 @@ # Grouped Relative Policy Optimization (GRPO) -# >>> python -m .meta.mast.main --config .meta/mast/qwen3_1_7b_mast.yaml +# >>> ./.meta/mast/launch.sh .meta/mast/qwen3_1_7b_mast.yaml # Global configuration group_size: 8 diff --git a/.meta/mast/qwen3_32b_mast.yaml b/.meta/mast/qwen3_32b_mast.yaml index 10becc9bc..2dc25509d 100644 --- a/.meta/mast/qwen3_32b_mast.yaml +++ b/.meta/mast/qwen3_32b_mast.yaml @@ -1,5 +1,5 @@ # Grouped Relative Policy Optimization (GRPO) -# >>> python -m .meta.mast.main --config .meta/mast/qwen3_32b_mast.yaml +# >>> ./.meta/mast/launch.sh .meta/mast/qwen3_32b_mast.yaml # Global configuration group_size: 8 diff --git a/.meta/mast/qwen3_4b_mast.yaml b/.meta/mast/qwen3_4b_mast.yaml index 8cf7596d9..5e74f4b2a 100644 --- a/.meta/mast/qwen3_4b_mast.yaml +++ b/.meta/mast/qwen3_4b_mast.yaml @@ -1,5 +1,5 @@ # Grouped Relative Policy Optimization (GRPO) -# >>> python -m .meta.mast.main --config .meta/mast/qwen3_4b_mast.yaml +# >>> ./.meta/mast/launch.sh .meta/mast/qwen3_4b_mast.yaml # Global configuration group_size: 8 diff --git a/.meta/mast/qwen3_8b_mast.yaml b/.meta/mast/qwen3_8b_mast.yaml index f28abf02d..7f5b49af6 100644 --- a/.meta/mast/qwen3_8b_mast.yaml +++ b/.meta/mast/qwen3_8b_mast.yaml @@ -1,5 +1,5 @@ # Grouped Relative Policy Optimization (GRPO) -# >>> python -m .meta.mast.main --config .meta/mast/qwen3_8b_mast.yaml +# >>> ./.meta/mast/launch.sh .meta/mast/qwen3_8b_mast.yaml # Global configuration group_size: 8