diff --git a/.meta/mast/README.md b/.meta/mast/README.md new file mode 100644 index 000000000..e6f64d739 --- /dev/null +++ b/.meta/mast/README.md @@ -0,0 +1,46 @@ +# Forge MAST Environment Setup + +A simple setup script to automatically configure your environment for running Forge with MAST jobs. +This only applies to Meta internal users. + +## Quick Start + +⚠️ Important Note: the setup script will clone the forge repository under "/data/users/$USER". + +### 1. Run the Setup Script + +The `env_setup.sh` script will automatically: +- ✅ Activate and configure the required conda environment +- ✅ Clone/update the Forge repository +- ✅ Install Forge package dependencies +- ✅ Mount the required oilfs workspace to `/mnt/wsfuse` +- ✅ Configure your environment for MAST job submission + +```bash +# Make the script executable +chmod +x .meta/mast/env_setup.sh + +# Run the setup +./.meta/mast/env_setup.sh + +``` + +### 2. Submit MAST job + +Use the launch script to submit a MAST job: + +```bash +# Make the launch script executable (first time only) +chmod +x .meta/mast/launch.sh + +# Launch a job with your desired config +./.meta/mast/launch.sh .meta/mast/qwen3_1_7b_mast.yaml +``` + +The launch script will automatically: +- Navigate to the forge root directory +- Reinstall the forge package with your latest changes +- Set the correct PYTHONPATH +- Launch the MAST job with the specified config + +You can run it from anywhere, and it will figure out the correct paths. diff --git a/apps/mast/__init__.py b/.meta/mast/__init__.py similarity index 100% rename from apps/mast/__init__.py rename to .meta/mast/__init__.py diff --git a/apps/mast/env_setup.sh b/.meta/mast/env_setup.sh similarity index 71% rename from apps/mast/env_setup.sh rename to .meta/mast/env_setup.sh index 8d14371ac..feef663b7 100755 --- a/apps/mast/env_setup.sh +++ b/.meta/mast/env_setup.sh @@ -9,6 +9,9 @@ # setup_forge_env.sh - Setup conda environment and install forge with mounting set -e # Exit on any error +# Configuration +CONDA_ENV_NAME="forge:stable" + # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' @@ -45,6 +48,7 @@ mount_workspace() { log_info "Creating mount directory: $mount_dir" sudo mkdir -p "$mount_dir" || { log_error "Failed to create mount directory (may need sudo privileges)" + log_error "You could alternatively try to unmount with `sudo umount /mnt/wsfuse`" return 1 } fi @@ -130,10 +134,10 @@ if [ ! -f "$CONDA_SCRIPT_PATH" ]; then fi log_info "Sourcing conda script: $CONDA_SCRIPT_PATH" -source "$CONDA_SCRIPT_PATH" activate forge:e146614 +source "$CONDA_SCRIPT_PATH" activate "$CONDA_ENV_NAME" if [ $? -ne 0 ]; then - log_error "Failed to activate conda environment forge-e146614" + log_error "Failed to activate conda environment $CONDA_ENV_NAME" exit 1 fi @@ -191,8 +195,72 @@ fi log_info "Current directory: $(pwd)" -# Step 5: Install forge package -log_info "Step 5: Installing forge package..." +# Step 5: Install torchtitan +log_info "Step 5: Installing torchtitan..." + +# Source versions.sh to get the pinned commit +VERSIONS_FILE="$FORGE_REPO_DIR/assets/versions.sh" +if [ -f "$VERSIONS_FILE" ]; then + log_info "Sourcing version information from: $VERSIONS_FILE" + source "$VERSIONS_FILE" + + if [ -n "$TORCHTITAN_COMMIT" ]; then + log_info "Installing torchtitan from commit: $TORCHTITAN_COMMIT" + pip uninstall -y torchtitan + pip install "git+https://github.com/pytorch/torchtitan.git@$TORCHTITAN_COMMIT" + + if [ $? -eq 0 ]; then + log_info "Torchtitan installed successfully" + else + log_error "Failed to install torchtitan" + exit 1 + fi + else + log_error "TORCHTITAN_COMMIT not found in versions.sh" + exit 1 + fi +else + log_error "versions.sh not found at: $VERSIONS_FILE" + log_error "Cannot proceed without version information" + exit 1 +fi + +# Step 5.5: Apply monarch torch import hack +log_info "Step 5.5: Applying monarch torch import hack..." + +MONARCH_INIT="$CONDA_PREFIX/lib/python3.10/site-packages/monarch/__init__.py" +if [ -f "$MONARCH_INIT" ]; then + # Check if we already applied the hack + if grep -q "^import torch # Injected by forge setup" "$MONARCH_INIT"; then + log_info "Monarch torch import hack already applied, skipping" + else + log_info "Injecting 'import torch' into monarch/__init__.py" + + # Create a backup + cp "$MONARCH_INIT" "$MONARCH_INIT.bak" + + # Use sed to inject 'import torch' before the "# Import before monarch" comment + # We add it right after "from typing import TYPE_CHECKING" and before the comment + sed -i '/^from typing import TYPE_CHECKING$/a\ +\ +# Torch must be imported before monarch (injected by forge setup)\ +import torch # Injected by forge setup' "$MONARCH_INIT" + + if [ $? -eq 0 ]; then + log_info "Successfully injected torch import into monarch/__init__.py" + else + log_error "Failed to inject torch import, restoring backup" + mv "$MONARCH_INIT.bak" "$MONARCH_INIT" + exit 1 + fi + fi +else + log_warn "monarch/__init__.py not found at: $MONARCH_INIT" + log_warn "Skipping monarch torch import hack (monarch may not be installed yet)" +fi + +# Step 6: Install forge package +log_info "Step 6: Installing forge package..." pip install --no-deps --force-reinstall . if [ $? -ne 0 ]; then log_error "Failed to install forge package" @@ -234,5 +302,5 @@ log_info "Mounted workspace available at: /mnt/wsfuse" echo "" log_info "Installation completed successfully!" echo "" -log_info "Re-activate the conda environment to make the changes take effect:" -log_info "conda deactivate && conda activate forge-e146614" +log_info "Test that this is working locally with:" +log_info "python -m apps.grpo.main --config=apps/grpo/qwen3_1_7b.yaml" diff --git a/.meta/mast/launch.sh b/.meta/mast/launch.sh new file mode 100755 index 000000000..46da56d12 --- /dev/null +++ b/.meta/mast/launch.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# launch.sh - Launch MAST jobs with Forge +set -e # Exit on any error + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Logging functions +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Check if config file is provided +if [ $# -eq 0 ]; then + log_error "No config file provided" + echo "Usage: $0 " + echo "Example: $0 .meta/mast/qwen3_1_7b_mast.yaml" + exit 1 +fi + +CONFIG_FILE="$1" + +# Get the directory where this script is located +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# Navigate to forge root (two levels up from .meta/mast/) +FORGE_ROOT="$( cd "$SCRIPT_DIR/../.." && pwd )" + +log_info "Forge root directory: $FORGE_ROOT" +log_info "Config file: $CONFIG_FILE" + +# Check if config file exists +if [ ! -f "$FORGE_ROOT/$CONFIG_FILE" ]; then + log_error "Config file not found: $FORGE_ROOT/$CONFIG_FILE" + exit 1 +fi + +# Navigate to forge root +cd "$FORGE_ROOT" +log_info "Changed to directory: $(pwd)" + +# Reinstall forge package +log_info "Reinstalling forge package..." +pip install --force-reinstall --no-deps . +if [ $? -ne 0 ]; then + log_error "Failed to reinstall forge package" + exit 1 +fi + +log_info "Successfully reinstalled forge package" + +# Launch the job +log_info "Launching MAST job..." +PYTHONPATH=. python .meta/mast/main.py --config "$CONFIG_FILE" diff --git a/apps/mast/main.py b/.meta/mast/main.py similarity index 100% rename from apps/mast/main.py rename to .meta/mast/main.py diff --git a/apps/mast/qwen3_14b_mast.yaml b/.meta/mast/qwen3_14b_mast.yaml similarity index 98% rename from apps/mast/qwen3_14b_mast.yaml rename to .meta/mast/qwen3_14b_mast.yaml index d9e9d7edd..f1f05825f 100644 --- a/apps/mast/qwen3_14b_mast.yaml +++ b/.meta/mast/qwen3_14b_mast.yaml @@ -1,5 +1,5 @@ # Grouped Relative Policy Optimization (GRPO) -# >>> python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml +# >>> ./.meta/mast/launch.sh .meta/mast/qwen3_14b_mast.yaml # Global configuration group_size: 8 diff --git a/apps/mast/qwen3_1_7b_mast.yaml b/.meta/mast/qwen3_1_7b_mast.yaml similarity index 98% rename from apps/mast/qwen3_1_7b_mast.yaml rename to .meta/mast/qwen3_1_7b_mast.yaml index 5c1033db2..39aaf01ba 100644 --- a/apps/mast/qwen3_1_7b_mast.yaml +++ b/.meta/mast/qwen3_1_7b_mast.yaml @@ -1,5 +1,5 @@ # Grouped Relative Policy Optimization (GRPO) -# >>> python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml +# >>> ./.meta/mast/launch.sh .meta/mast/qwen3_1_7b_mast.yaml # Global configuration group_size: 8 diff --git a/apps/mast/qwen3_32b_mast.yaml b/.meta/mast/qwen3_32b_mast.yaml similarity index 98% rename from apps/mast/qwen3_32b_mast.yaml rename to .meta/mast/qwen3_32b_mast.yaml index f0e57edac..2dc25509d 100644 --- a/apps/mast/qwen3_32b_mast.yaml +++ b/.meta/mast/qwen3_32b_mast.yaml @@ -1,5 +1,5 @@ # Grouped Relative Policy Optimization (GRPO) -# >>> python -m apps.mast.main --config apps/mast/qwen3_1_7b_mast.yaml +# >>> ./.meta/mast/launch.sh .meta/mast/qwen3_32b_mast.yaml # Global configuration group_size: 8 diff --git a/apps/mast/qwen3_4b_mast.yaml b/.meta/mast/qwen3_4b_mast.yaml similarity index 98% rename from apps/mast/qwen3_4b_mast.yaml rename to .meta/mast/qwen3_4b_mast.yaml index 2a8d2b864..5e74f4b2a 100644 --- a/apps/mast/qwen3_4b_mast.yaml +++ b/.meta/mast/qwen3_4b_mast.yaml @@ -1,5 +1,5 @@ # Grouped Relative Policy Optimization (GRPO) -# >>> python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml +# >>> ./.meta/mast/launch.sh .meta/mast/qwen3_4b_mast.yaml # Global configuration group_size: 8 diff --git a/apps/mast/qwen3_8b_mast.yaml b/.meta/mast/qwen3_8b_mast.yaml similarity index 98% rename from apps/mast/qwen3_8b_mast.yaml rename to .meta/mast/qwen3_8b_mast.yaml index 81c1f75dd..7f5b49af6 100644 --- a/apps/mast/qwen3_8b_mast.yaml +++ b/.meta/mast/qwen3_8b_mast.yaml @@ -1,5 +1,5 @@ # Grouped Relative Policy Optimization (GRPO) -# >>> python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml +# >>> ./.meta/mast/launch.sh .meta/mast/qwen3_8b_mast.yaml # Global configuration group_size: 8 diff --git a/apps/mast/README.md b/apps/mast/README.md deleted file mode 100644 index 60a9b4146..000000000 --- a/apps/mast/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# Forge MAST Environment Setup - -A simple setup script to automatically configure your environment for running Forge with MAST jobs. - -## Quick Start - -⚠️ Important Note: the setup script will clone the forge repository under "/data/users/$USER". - -### 1. Run the Setup Script - -The `env_setup.sh` script will automatically: -- ✅ Activate the required conda environment (`forge-8448524`) -- ✅ Clone/update the Forge repository -- ✅ Install Forge package dependencies -- ✅ Mount the required oilfs workspace to `/mnt/wsfuse` -- ✅ Configure your environment for MAST job submission - -```bash -# Make the script executable -chmod +x env_setup.sh - -# Run the setup -./apps/mast/env_setup.sh - -``` - -### 2. Submit MAST job - -``` -pip install --force-reinstall --no-deps . && python -m apps.mast.main --config apps/mast/qwen3_1_7b_mast.yaml -``` - -⚠️ Important Note: `pip install --force-reinstall --no-deps .` is required every time you make a change to the local codebase. This ensures your latest changes are installed before job submission.