From b56a6e791f2fed0c69428855107df5cece2816e8 Mon Sep 17 00:00:00 2001
From: "promptless[bot]" <promptless[bot]@users.noreply.github.com>
Date: Wed, 22 Apr 2026 16:55:58 +0000
Subject: [PATCH 1/3] Rename flash run to flash dev and document implicit
 endpoint resolution

- Rename flash/cli/run.mdx to flash/cli/dev.mdx
- Update all references from flash run to flash dev across docs
- Document new implicit endpoint resolution feature with FLASH_APP and FLASH_ENV environment variables
- Add note that flash run is a hidden alias for backward compatibility
---
 docs.json                                     |  2 +-
 flash/apps/build-app.mdx                      | 10 +--
 flash/apps/customize-app.mdx                  |  8 +-
 flash/apps/deploy-apps.mdx                    | 88 +++++++++++++++++++
 flash/apps/initialize-project.mdx             |  8 +-
 flash/apps/local-testing.mdx                  | 32 +++----
 flash/apps/overview.mdx                       |  4 +-
 flash/cli/build.mdx                           |  2 +-
 flash/cli/deploy.mdx                          |  6 +-
 flash/cli/{run.mdx => dev.mdx}                | 26 +++---
 flash/cli/init.mdx                            |  8 +-
 flash/cli/login.mdx                           |  4 +-
 flash/cli/overview.mdx                        |  6 +-
 flash/cli/undeploy.mdx                        |  8 +-
 flash/cli/update.mdx                          |  2 +-
 flash/configuration/best-practices.mdx        |  4 +-
 flash/execution-model.mdx                     |  2 +-
 flash/troubleshooting.mdx                     |  2 +-
 release-notes.mdx                             |  2 +-
 .../build-rest-api-with-load-balancer.mdx     | 14 +--
 20 files changed, 165 insertions(+), 73 deletions(-)
 rename flash/cli/{run.mdx => dev.mdx} (93%)

diff --git a/docs.json b/docs.json
index 114375d8..d464abda 100644
--- a/docs.json
+++ b/docs.json
@@ -391,7 +391,7 @@
               "flash/cli/overview",
               "flash/cli/init",
               "flash/cli/login",
-              "flash/cli/run",
+              "flash/cli/dev",
               "flash/cli/build",
               "flash/cli/deploy",
               "flash/cli/env",
diff --git a/flash/apps/build-app.mdx b/flash/apps/build-app.mdx
index eb54ca15..4e9529f4 100644
--- a/flash/apps/build-app.mdx
+++ b/flash/apps/build-app.mdx
@@ -80,10 +80,10 @@ uv pip install -r requirements.txt
 
 ## Step 4: Start the local API server
 
-Use `flash run` to start the API server:
+Use `flash dev` to start the API server:
 
 ```bash
-uv run flash run
+uv run flash dev
 ```
 
 Open a new terminal tab or window and test your endpoints using cURL:
@@ -100,21 +100,21 @@ curl -X POST http://localhost:8888/lb_worker/process \
     -d '{"input_data": {"message": "Hello from Flash"}}'
 ```
 
-If you switch back to the terminal tab where you used `flash run`, you'll see the details of the job's progress.
+If you switch back to the terminal tab where you used `flash dev`, you'll see the details of the job's progress.
 
 ### Faster testing with auto-provisioning
 
 For development with multiple endpoints, use `--auto-provision` to deploy all resources before testing:
 
 ```bash
-uv run flash run --auto-provision
+uv run flash dev --auto-provision
 ```
 
 This eliminates cold-start delays by provisioning all serverless endpoints upfront. Endpoints are cached and reused across server restarts, making subsequent runs faster. Resources are identified by name, so the same endpoint won't be re-deployed if the configuration hasn't changed.
 
 ## Step 5: Open the API explorer
 
-Besides starting the API server, `flash run` also starts an interactive API explorer. Point your web browser at [http://localhost:8888/docs](http://localhost:8888/docs) to explore the API.
+Besides starting the API server, `flash dev` also starts an interactive API explorer. Point your web browser at [http://localhost:8888/docs](http://localhost:8888/docs) to explore the API.
 
 To run endpoint functions in the explorer:
 
diff --git a/flash/apps/customize-app.mdx b/flash/apps/customize-app.mdx
index ba0bad44..b213d73c 100644
--- a/flash/apps/customize-app.mdx
+++ b/flash/apps/customize-app.mdx
@@ -145,13 +145,13 @@ For details, see:
 
 ## Test your customizations
 
-After customizing your app, test locally with `flash run`:
+After customizing your app, test locally with `flash dev`:
 
 ```bash
-flash run
+flash dev
 
 # If using uv:
-uv run flash run
+uv run flash dev
 ```
 
 This starts a development server at http://localhost:8888 with:
@@ -169,7 +169,7 @@ Make sure to test:
 
 <CardGroup cols={2}>
   <Card title="Test locally" href="/flash/apps/local-testing" icon="flask" horizontal>
-    Use `flash run` for local development and testing.
+    Use `flash dev` for local development and testing.
   </Card>
   <Card title="Deploy to Runpod" href="/flash/apps/deploy-apps" icon="rocket" horizontal>
     Deploy your application to production with `flash deploy`.
diff --git a/flash/apps/deploy-apps.mdx b/flash/apps/deploy-apps.mdx
index ed1bae34..73eef3e8 100644
--- a/flash/apps/deploy-apps.mdx
+++ b/flash/apps/deploy-apps.mdx
@@ -369,6 +369,94 @@ async def classify(text: str) -> dict:
     return {"classification": result}
 ```
 
+## Call deployed endpoints from scripts
+
+After deploying your Flash app, you can call your `@Endpoint` functions directly from Python scripts by setting the `FLASH_APP` and `FLASH_ENV` environment variables. This enables implicit endpoint resolution, where Flash automatically routes function calls to the deployed endpoints in the specified app and environment.
+
+### Set the environment context
+
+Export the environment variables before running your script:
+
+```bash
+export FLASH_APP="my-app"
+export FLASH_ENV="production"
+
+python my_script.py
+```
+
+Or set them inline:
+
+```bash
+FLASH_APP=my-app FLASH_ENV=production python my_script.py
+```
+
+### How it works
+
+When you call an `@Endpoint` function and both `FLASH_APP` and `FLASH_ENV` are set, Flash:
+
+1. Looks up the deployed endpoint by name within the specified app and environment.
+2. Routes the request to that endpoint using Flash's sentinel service.
+3. Returns the result to your script.
+
+This lets you reuse the same `@Endpoint` function definitions to interact with deployed endpoints without modifying your code.
+
+### Example
+
+Given a deployed endpoint:
+
+```python
+# gpu_worker.py
+from runpod_flash import Endpoint, GpuType
+
+@Endpoint(
+    name="inference",
+    gpu=GpuType.NVIDIA_GEFORCE_RTX_4090,
+    dependencies=["torch"]
+)
+async def run_inference(data: dict) -> dict:
+    import torch
+    # Inference logic
+    return {"result": "processed"}
+```
+
+You can call it from a separate script:
+
+```python
+# call_inference.py
+import asyncio
+from gpu_worker import run_inference
+
+async def main():
+    # With FLASH_APP and FLASH_ENV set, this calls the deployed endpoint
+    result = await run_inference({"input": "data"})
+    print(result)
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+Run the script with the environment variables:
+
+```bash
+FLASH_APP=my-app FLASH_ENV=production python call_inference.py
+```
+
+### Error without context
+
+If you run a script with `@Endpoint` calls without setting `FLASH_APP` and `FLASH_ENV`, and without using `flash dev`, Flash raises an error:
+
+```text
+RuntimeError: no flash context for endpoint 'inference'. either:
+  - use 'flash dev' for local development
+  - set FLASH_APP and FLASH_ENV to target a deployed environment
+```
+
+This explicit error prevents accidental provisioning of new endpoints when you intend to use existing deployed ones.
+
+### Automatic context in deployed workers
+
+When Flash deploys your app, it automatically sets `FLASH_APP` and `FLASH_ENV` environment variables on each worker. This enables cross-endpoint communication within your deployed application without additional configuration.
+
 ## Troubleshooting
 
 ### No @Endpoint functions found
diff --git a/flash/apps/initialize-project.mdx b/flash/apps/initialize-project.mdx
index 5e018ec7..2e7058f2 100644
--- a/flash/apps/initialize-project.mdx
+++ b/flash/apps/initialize-project.mdx
@@ -8,7 +8,7 @@ import { LoadBalancingEndpointsTooltip, QueueBasedEndpointsTooltip } from "/snip
 
 The `flash init` command creates a new Flash project with a complete project structure, including example <LoadBalancingEndpointsTooltip /> and <QueueBasedEndpointsTooltip />, and configuration files. This gives you a working starting point for building Flash applications.
 
-Use `flash init` whenever you want to start a new Flash project, fully configured for you to run `flash run` and `flash deploy`.
+Use `flash init` whenever you want to start a new Flash project, fully configured for you to run `flash dev` and `flash deploy`.
 
 ## Create a new project
 
@@ -105,13 +105,13 @@ Once your project is set up:
 
 ```bash
 # Start the development server
-flash run
+flash dev
 
 # Open the API explorer
 # http://localhost:8888/docs
 
 # If using uv:
-uv run flash run
+uv run flash dev
 ```
 
 Make changes to your worker files, and the server reloads automatically. When you're ready, deploy with:
@@ -126,6 +126,6 @@ uv run flash deploy
 ## Next steps
 
 - [Customize your app](/flash/apps/customize-app) to add endpoints and modify configurations.
-- [Test locally](/flash/apps/local-testing) with `flash run`.
+- [Test locally](/flash/apps/local-testing) with `flash dev`.
 - [Deploy to production](/flash/apps/deploy-apps) with `flash deploy`.
 - [View the flash init reference](/flash/cli/init) for all options.
diff --git a/flash/apps/local-testing.mdx b/flash/apps/local-testing.mdx
index 257db090..e2653b84 100644
--- a/flash/apps/local-testing.mdx
+++ b/flash/apps/local-testing.mdx
@@ -1,10 +1,10 @@
 ---
 title: "Test Flash apps locally"
 sidebarTitle: "Test locally"
-description: "Use flash run to test your Flash application locally before deploying."
+description: "Use flash dev to test your Flash application locally before deploying."
 ---
 
-The `flash run` command starts a local development server that lets you test your Flash application before deploying to production. The development server runs locally and updates automatically as you edit files. 
+The `flash dev` command starts a local development server that lets you test your Flash application before deploying to production. The development server runs locally and updates automatically as you edit files.
 
 When you call a `@Endpoint` function, Flash sends the latest function code to Serverless workers on Runpod, so your changes are reflected immediately.
 
@@ -13,10 +13,10 @@ When you call a `@Endpoint` function, Flash sends the latest function code to Se
 From inside your [project directory](/flash/apps/initialize-project), run:
 
 ```bash
-flash run
+flash dev
 
 # If using uv:
-uv run flash run
+uv run flash dev
 ```
 
 The server starts at `http://localhost:8888` by default. Your endpoints are available immediately for testing, and `@Endpoint` functions provision Serverless endpoints on first call.
@@ -25,14 +25,14 @@ The server starts at `http://localhost:8888` by default. Your endpoints are avai
 
 ```bash
 # Change port
-flash run --port 3000
+flash dev --port 3000
 
 # Make accessible on network
-flash run --host 0.0.0.0
+flash dev --host 0.0.0.0
 
 # If using uv:
-uv run flash run --port 3000
-uv run flash run --host 0.0.0.0
+uv run flash dev --port 3000
+uv run flash dev --host 0.0.0.0
 ```
 
 ## Test your endpoints
@@ -96,17 +96,17 @@ print(response.json())
 The first call to a `@Endpoint` function provisions a Serverless endpoint, which takes 30-60 seconds. Use `--auto-provision` to provision all endpoints at startup:
 
 ```bash
-flash run --auto-provision
+flash dev --auto-provision
 
 # If using uv:
-uv run flash run --auto-provision
+uv run flash dev --auto-provision
 ```
 
 This scans your project for `@Endpoint` functions and deploys them before the server starts accepting requests. Endpoints are cached in `.flash/resources.pkl` and reused across server restarts.
 
 ## How it works
 
-With `flash run`, Flash starts a local development server alongside remote Serverless endpoints:
+With `flash dev`, Flash starts a local development server alongside remote Serverless endpoints:
 
 ```mermaid
 %%{init: {'theme':'base', 'themeVariables': { 'primaryColor':'#9289FE','primaryTextColor':'#fff','primaryBorderColor':'#9289FE','lineColor':'#5F4CFE','secondaryColor':'#AE6DFF','tertiaryColor':'#FCB1FF','edgeLabelBackground':'#5F4CFE', 'fontSize':'14px','fontFamily':'font-inter'}}}%%
@@ -146,11 +146,11 @@ flowchart TB
 | `@Endpoint` function code | Runpod Serverless |
 | Endpoint storage | Runpod Serverless |
 
-Your code updates automatically as you edit files. Endpoints created by `flash run` are prefixed with `live-` to distinguish them from production endpoints.
+Your code updates automatically as you edit files. Endpoints created by `flash dev` are prefixed with `live-` to distinguish them from production endpoints.
 
 ## Clean up after testing
 
-Endpoints created by `flash run` persist until you delete them. To clean up:
+Endpoints created by `flash dev` persist until you delete them. To clean up:
 
 ```bash
 # List all endpoints
@@ -179,10 +179,10 @@ Flash automatically selects the next available port if your specified port is in
 Use `--auto-provision` to eliminate cold-start delays:
 
 ```bash
-flash run --auto-provision
+flash dev --auto-provision
 
 # If using uv:
-uv run flash run --auto-provision
+uv run flash dev --auto-provision
 ```
 
 **Authentication errors**
@@ -210,4 +210,4 @@ Values in your `.env` file are only available locally for CLI commands. They are
 
 - [Deploy to production](/flash/apps/deploy-apps) when your app is ready.
 - [Clean up endpoints](/flash/cli/undeploy) after testing.
-- [View the flash run reference](/flash/cli/run) for all options.
+- [View the flash dev reference](/flash/cli/dev) for all options.
diff --git a/flash/apps/overview.mdx b/flash/apps/overview.mdx
index f85f7c10..fc42fe6f 100644
--- a/flash/apps/overview.mdx
+++ b/flash/apps/overview.mdx
@@ -59,7 +59,7 @@ Building a Flash application follows a clear progression from initialization to
     Start a local development server to test your application:
 
     ```bash
-    flash run
+    flash dev
     ```
 
     Your app runs locally and updates automatically. When you call an `@Endpoint` function, Flash sends the latest code to Runpod workers. [Learn more about local testing](/flash/apps/local-testing).
@@ -102,7 +102,7 @@ Flash uses a two-level organizational structure: **apps** (project containers) a
     Create boilerplate code for a new Flash project with `flash init`.
   </Card>
   <Card title="Test locally" href="/flash/apps/local-testing" icon="flask" horizontal>
-    Use `flash run` for local development and testing.
+    Use `flash dev` for local development and testing.
   </Card>
   <Card title="Deploy to Runpod" href="/flash/apps/deploy-apps" icon="rocket" horizontal>
     Deploy your application to production with `flash deploy`.
diff --git a/flash/cli/build.mdx b/flash/cli/build.mdx
index f9fa4330..e3c32501 100644
--- a/flash/cli/build.mdx
+++ b/flash/cli/build.mdx
@@ -164,7 +164,7 @@ ls .flash/.build/
 ## Related commands
 
 - [`flash deploy`](/flash/cli/deploy) - Build and deploy in one step (includes `--preview` option for local testing)
-- [`flash run`](/flash/cli/run) - Start development server
+- [`flash dev`](/flash/cli/dev) - Start development server
 - [`flash env`](/flash/cli/env) - Manage environments
 
 <Note>
diff --git a/flash/cli/deploy.mdx b/flash/cli/deploy.mdx
index 41c87d86..644adf84 100644
--- a/flash/cli/deploy.mdx
+++ b/flash/cli/deploy.mdx
@@ -214,9 +214,9 @@ flash deploy --exclude scipy,pandas
 
 See [`flash build` - Managing deployment size](/flash/cli/build#managing-deployment-size) for more details.
 
-## flash run vs flash deploy
+## flash dev vs flash deploy
 
-See [`flash run`](/flash/cli/run#flash-run-vs-flash-deploy) for a detailed comparison of local development vs production deployment.
+See [`flash dev`](/flash/cli/dev#flash-dev-vs-flash-deploy) for a detailed comparison of local development vs production deployment.
 
 ## Troubleshooting
 
@@ -252,7 +252,7 @@ export RUNPOD_API_KEY="your_key_here"
 ## Related commands
 
 - [`flash build`](/flash/cli/build) - Build without deploying
-- [`flash run`](/flash/cli/run) - Local development server
+- [`flash dev`](/flash/cli/dev) - Local development server
 - [`flash env`](/flash/cli/env) - Manage environments
 - [`flash app`](/flash/cli/app) - Manage applications
 - [`flash undeploy`](/flash/cli/undeploy) - Remove endpoints
diff --git a/flash/cli/run.mdx b/flash/cli/dev.mdx
similarity index 93%
rename from flash/cli/run.mdx
rename to flash/cli/dev.mdx
index 3a902078..4ff0d630 100644
--- a/flash/cli/run.mdx
+++ b/flash/cli/dev.mdx
@@ -1,32 +1,36 @@
 ---
-title: "run"
-sidebarTitle: "run"
+title: "dev"
+sidebarTitle: "dev"
 ---
 
 Start the Flash development server for local testing with automatic updates. A local development server provides a unified interface for testing while `@Endpoint` functions execute on Runpod Serverless.
 
 ```bash
-flash run [OPTIONS]
+flash dev [OPTIONS]
 ```
 
+<Note>
+`flash run` is a hidden alias for `flash dev` and works identically. New projects should use `flash dev`.
+</Note>
+
 ## Example
 
 Start the development server with defaults:
 
 ```bash
-flash run
+flash dev
 ```
 
 Start with auto-provisioning to eliminate cold-start delays:
 
 ```bash
-flash run --auto-provision
+flash dev --auto-provision
 ```
 
 Start on a custom port:
 
 ```bash
-flash run --port 3000
+flash dev --port 3000
 ```
 
 ## Flags
@@ -64,11 +68,11 @@ def analyze_text(text: str) -> dict:
     return {"sentiment": "positive"}
 ```
 
-When you run `flash run`, the startup table displays "Analyze text and return sentiment scores" as the description for this endpoint, and the same text appears in the Swagger UI summary.
+When you run `flash dev`, the startup table displays "Analyze text and return sentiment scores" as the description for this endpoint, and the same text appears in the Swagger UI summary.
 
 ## Architecture
 
-With `flash run`, Flash starts a local development server alongside remote Serverless endpoints:
+With `flash dev`, Flash starts a local development server alongside remote Serverless endpoints:
 
 ```mermaid
 %%{init: {'theme':'base', 'themeVariables': { 'primaryColor':'#9289FE','primaryTextColor':'#fff','primaryBorderColor':'#9289FE','lineColor':'#5F4CFE','secondaryColor':'#AE6DFF','tertiaryColor':'#FCB1FF','edgeLabelBackground':'#5F4CFE', 'fontSize':'14px','fontFamily':'font-inter'}}}%%
@@ -114,7 +118,7 @@ This differs from `flash deploy`, where all endpoints run on Runpod without a lo
 By default, endpoints are provisioned lazily on first `@Endpoint` function call. Use `--auto-provision` to provision all endpoints at server startup:
 
 ```bash
-flash run --auto-provision
+flash dev --auto-provision
 ```
 
 ### How it works
@@ -174,9 +178,9 @@ Open http://localhost:8888/docs for the interactive API explorer.
 - `RUNPOD_API_KEY` must be set in your `.env` file or environment.
 - A valid Flash project structure (created by `flash init` or manually).
 
-## flash run vs flash deploy
+## flash dev vs flash deploy
 
-| Aspect | `flash run` | `flash deploy` |
+| Aspect | `flash dev` | `flash deploy` |
 |--------|-------------|----------------|
 | Local development server | Yes (http://localhost:8888) | No |
 | `@Endpoint` functions run on | Runpod Serverless | Runpod Serverless |
diff --git a/flash/cli/init.mdx b/flash/cli/init.mdx
index a2c24351..f5085402 100644
--- a/flash/cli/init.mdx
+++ b/flash/cli/init.mdx
@@ -17,7 +17,7 @@ Create a new project directory:
 flash init PROJECT_NAME
 cd PROJECT_NAME
 pip install -r requirements.txt
-flash run
+flash dev
 ```
 
 Initialize in the current directory:
@@ -68,18 +68,18 @@ After initialization:
 
 1. Copy `.env.example` to `.env` (if needed) and add your `RUNPOD_API_KEY`.
 2. Install dependencies: `pip install -r requirements.txt`
-3. Start the development server: `flash run`
+3. Start the development server: `flash dev`
 4. Open http://localhost:8888/docs to explore the API.
 5. Customize the workers for your use case.
 6. Deploy with `flash deploy` when ready.
 
 <Note>
 
-This command only creates local files. It doesn't interact with Runpod or create any cloud resources. Cloud resources are created when you run `flash run` or `flash deploy`.
+This command only creates local files. It doesn't interact with Runpod or create any cloud resources. Cloud resources are created when you run `flash dev` or `flash deploy`.
 
 </Note>
 
 ## Related commands
 
-- [`flash run`](/flash/cli/run) - Start the development server
+- [`flash dev`](/flash/cli/dev) - Start the development server
 - [`flash deploy`](/flash/cli/deploy) - Build and deploy to Runpod
diff --git a/flash/cli/login.mdx b/flash/cli/login.mdx
index a0d652ce..3dddc586 100644
--- a/flash/cli/login.mdx
+++ b/flash/cli/login.mdx
@@ -44,7 +44,7 @@ Force re-authentication even if credentials already exist. By default, `flash lo
 
 After successful login, your API key is saved to `~/.runpod/config.toml`. This file is used by:
 
-- All Flash CLI commands (`flash run`, `flash deploy`, etc.)
+- All Flash CLI commands (`flash dev`, `flash deploy`, etc.)
 - Standalone Python scripts using `@Endpoint` functions
 - Any code using the Flash SDK
 
@@ -91,5 +91,5 @@ Your Runpod API key needs **All** access permissions.
 ## Related commands
 
 - [`flash init`](/flash/cli/init) - Create a new Flash project
-- [`flash run`](/flash/cli/run) - Start the development server
+- [`flash dev`](/flash/cli/dev) - Start the development server
 - [`flash deploy`](/flash/cli/deploy) - Build and deploy to Runpod
diff --git a/flash/cli/overview.mdx b/flash/cli/overview.mdx
index cf52b8b9..f9afb0d7 100644
--- a/flash/cli/overview.mdx
+++ b/flash/cli/overview.mdx
@@ -15,7 +15,7 @@ Before using the CLI, make sure you've [installed Flash](/flash/overview#install
 |---------|-------------|
 | [`flash init`](/flash/cli/init) | Create a new Flash project with a template structure |
 | [`flash login`](/flash/cli/login) | Authenticate with Runpod using your API key |
-| [`flash run`](/flash/cli/run) | Start the local development server with automatic updates |
+| [`flash dev`](/flash/cli/dev) | Start the local development server with automatic updates |
 | [`flash build`](/flash/cli/build) | Build a deployment artifact without deploying |
 | [`flash deploy`](/flash/cli/deploy) | Build and deploy your application to Runpod |
 | [`flash env`](/flash/cli/env) | Manage deployment environments |
@@ -55,7 +55,7 @@ If you installed Flash with [uv](https://docs.astral.sh/uv/), prefix all Flash c
 
 ```bash
 uv run flash login
-uv run flash run
+uv run flash dev
 uv run flash deploy
 ```
 
@@ -73,7 +73,7 @@ pip install -r requirements.txt
 
 # Add your API key to .env
 # Start the development server
-flash run
+flash dev
 ```
 
 ### Deploy to production
diff --git a/flash/cli/undeploy.mdx b/flash/cli/undeploy.mdx
index f1ab6d09..51b59743 100644
--- a/flash/cli/undeploy.mdx
+++ b/flash/cli/undeploy.mdx
@@ -3,7 +3,7 @@ title: "undeploy"
 sidebarTitle: "undeploy"
 ---
 
-Manage and delete Runpod Serverless endpoints deployed via Flash. Use this command to clean up endpoints created during local development with `flash run`.
+Manage and delete Runpod Serverless endpoints deployed via Flash. Use this command to clean up endpoints created during local development with `flash dev`.
 
 ```bash
 flash undeploy [NAME|list] [OPTIONS]
@@ -139,8 +139,8 @@ For production deployments, use `flash env delete` to remove entire environments
 
 Flash tracks deployed endpoints in `.flash/resources.pkl`. Endpoints are added when you:
 
-- Run `flash run --auto-provision`
-- Run `flash run` and call `@Endpoint` functions
+- Run `flash dev --auto-provision`
+- Run `flash dev` and call `@Endpoint` functions
 - Run `flash deploy`
 
 The tracking file is in `.gitignore` and should never be committed. It contains local deployment state.
@@ -208,6 +208,6 @@ flash undeploy list
 
 ## Related commands
 
-- [`flash run`](/flash/cli/run) - Development server (creates endpoints)
+- [`flash dev`](/flash/cli/dev) - Development server (creates endpoints)
 - [`flash deploy`](/flash/cli/deploy) - Deploy to Runpod
 - [`flash env delete`](/flash/cli/env) - Delete entire environment
diff --git a/flash/cli/update.mdx b/flash/cli/update.mdx
index 1b353159..fd70b0f6 100644
--- a/flash/cli/update.mdx
+++ b/flash/cli/update.mdx
@@ -44,7 +44,7 @@ This check runs at most once every 24 hours and is cached locally to `~/.config/
 
 The background check does not run for:
 
-- `flash run` - Long-running development server where the notice would appear at an unpredictable time.
+- `flash dev` - Long-running development server where the notice would appear at an unpredictable time.
 - `flash update` - Already managing versions directly.
 
 ### Disabling update checks
diff --git a/flash/configuration/best-practices.mdx b/flash/configuration/best-practices.mdx
index b4b6ebc1..022b1690 100644
--- a/flash/configuration/best-practices.mdx
+++ b/flash/configuration/best-practices.mdx
@@ -82,7 +82,7 @@ Here are some best practices for development and testing environments prioritizi
 - **Set `workers=(0, n)`** to minimize costs when not actively testing.
 - **Keep max workers low** (1-3) to control development expenses.
 - **Use short `idle_timeout`** (300 seconds / 5 minutes) to scale down quickly between test runs.
-- **Test locally** with `flash run` before deploying to production.
+- **Test locally** with `flash dev` before deploying to production.
 
 ### Example configuration
 
@@ -167,4 +167,4 @@ Before deploying to production, verify:
 - **Storage**: Network volume attached if using large models or datasets
 - **Environment variables**: All configuration and secrets passed via `env` parameter
 - **Monitoring**: Health check routes implemented (load balancers)
-- **Testing**: Tested locally with `flash run` before production deployment
\ No newline at end of file
+- **Testing**: Tested locally with `flash dev` before production deployment
\ No newline at end of file
diff --git a/flash/execution-model.mdx b/flash/execution-model.mdx
index dbba8906..b6c6fd98 100644
--- a/flash/execution-model.mdx
+++ b/flash/execution-model.mdx
@@ -41,7 +41,7 @@ if __name__ == "__main__":
 
 When you build a [Flash app](/flash/apps/overview):
 
-**Development (`flash run`)**:
+**Development (`flash dev`)**:
 - FastAPI server runs **locally**.
 - `@Endpoint` functions run on **Runpod workers**.
 
diff --git a/flash/troubleshooting.mdx b/flash/troubleshooting.mdx
index 6e4bbd9d..4d40866f 100644
--- a/flash/troubleshooting.mdx
+++ b/flash/troubleshooting.mdx
@@ -383,7 +383,7 @@ HTTP error from endpoint [name]: 500 - Internal Server Error
 
 1. **Check logs**: View worker logs in the [Serverless console](https://www.runpod.io/console/serverless) for detailed error messages.
 
-2. **Test locally**: Use `flash run` to test your function locally before deploying.
+2. **Test locally**: Use `flash dev` to test your function locally before deploying.
 
 3. **Add error handling**: Wrap your function logic in try/except to provide better error messages:
    ```python
diff --git a/release-notes.mdx b/release-notes.mdx
index d30d4296..2a345628 100644
--- a/release-notes.mdx
+++ b/release-notes.mdx
@@ -33,7 +33,7 @@ print("Done!") # This runs locally
 - **Auto-scaling**: Workers scale from 0 to N based on demand.
 - **Dependency management**: Packages install automatically on remote workers.
 - **Two patterns**: Queue-based endpoints for batch work, load-balanced endpoints for REST APIs
-- **Flash apps**: Build production-ready APIs with `flash init`, `flash run`, and `flash deploy`
+- **Flash apps**: Build production-ready APIs with `flash init`, `flash dev`, and `flash deploy`
 
 **Get started:**
 
diff --git a/tutorials/flash/build-rest-api-with-load-balancer.mdx b/tutorials/flash/build-rest-api-with-load-balancer.mdx
index 5abc959c..6347ef62 100644
--- a/tutorials/flash/build-rest-api-with-load-balancer.mdx
+++ b/tutorials/flash/build-rest-api-with-load-balancer.mdx
@@ -256,7 +256,7 @@ Testing Text Analysis API
 The first three endpoints will run locally. The sentiment endpoint will be skipped unless you install transformers and torch locally, but it will work when deployed to Flash.
 
 <Note>
-**Local Testing Limitations**: The GPU sentiment endpoint requires `transformers` and `torch` to be installed locally for testing. For full testing of all endpoints including GPU routes, use `flash run` (covered in Step 9) instead of direct Python execution.
+**Local Testing Limitations**: The GPU sentiment endpoint requires `transformers` and `torch` to be installed locally for testing. For full testing of all endpoints including GPU routes, use `flash dev` (covered in Step 9) instead of direct Python execution.
 </Note>
 
 ## Step 8: Build a Flash app for production
@@ -386,10 +386,10 @@ Replace `YOUR_API_KEY` with your actual Runpod API key.
 Start the Flash development server:
 
 ```bash
-flash run
+flash dev
 
 # If using uv:
-uv run flash run
+uv run flash dev
 ```
 
 You'll see output showing all available endpoints:
@@ -409,7 +409,7 @@ Flash Dev Server  localhost:8888
 ```
 
 <Note>
-**Development Server Path Prefixes**: The `flash run` dev server adds worker file prefixes to routes (e.g., `/lb_worker/health`, `/gpu_worker/sentiment`). When deployed to production, endpoints use the paths as defined in the route decorators (e.g., `/health`, `/sentiment`) without the prefixes.
+**Development Server Path Prefixes**: The `flash dev` server adds worker file prefixes to routes (e.g., `/lb_worker/health`, `/gpu_worker/sentiment`). When deployed to production, endpoints use the paths as defined in the route decorators (e.g., `/health`, `/sentiment`) without the prefixes.
 </Note>
 
 Open http://localhost:8888/docs in your browser to see the interactive API documentation. You can test all your routes directly in the Swagger UI.
@@ -530,7 +530,7 @@ Expected response:
 ```
 
 <Note>
-**Production Path Note**: In production, the endpoints use the exact paths defined in your route decorators (e.g., `/health`, `/sentiment`), without the worker file prefixes used in `flash run`.
+**Production Path Note**: In production, the endpoints use the exact paths defined in your route decorators (e.g., `/health`, `/sentiment`), without the worker file prefixes used in `flash dev`.
 </Note>
 
 ## Understanding the deployment architecture
@@ -597,12 +597,12 @@ flowchart TB
 
 ### Port already in use
 
-**Issue**: `ERROR: [Errno 48] Address already in use` when running `flash run`
+**Issue**: `ERROR: [Errno 48] Address already in use` when running `flash dev`
 
 **Solutions**:
 ```bash
 # Use a different port
-flash run --port 8889
+flash dev --port 8889
 
 # Or kill the process using port 8888
 lsof -ti:8888 | xargs kill -9

From c35223ddc181a4d9b5713ac930d65e60b296f540 Mon Sep 17 00:00:00 2001
From: "promptless[bot]" <promptless[bot]@users.noreply.github.com>
Date: Sun, 26 Apr 2026 20:23:00 +0000
Subject: [PATCH 2/3] docs: Clarify env vars as override mechanism for endpoint
 resolution

Updates the "Call deployed endpoints from scripts" section to explain that
Flash automatically resolves app context from project structure. FLASH_APP
and FLASH_ENV are now documented as override mechanisms for special cases,
such as when a script is moved to a different directory.

Addresses reviewer feedback from PR #619.
---
 flash/apps/deploy-apps.mdx | 54 ++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 28 deletions(-)

diff --git a/flash/apps/deploy-apps.mdx b/flash/apps/deploy-apps.mdx
index 73eef3e8..f594dc0a 100644
--- a/flash/apps/deploy-apps.mdx
+++ b/flash/apps/deploy-apps.mdx
@@ -371,32 +371,16 @@ async def classify(text: str) -> dict:
 
 ## Call deployed endpoints from scripts
 
-After deploying your Flash app, you can call your `@Endpoint` functions directly from Python scripts by setting the `FLASH_APP` and `FLASH_ENV` environment variables. This enables implicit endpoint resolution, where Flash automatically routes function calls to the deployed endpoints in the specified app and environment.
-
-### Set the environment context
-
-Export the environment variables before running your script:
-
-```bash
-export FLASH_APP="my-app"
-export FLASH_ENV="production"
-
-python my_script.py
-```
-
-Or set them inline:
-
-```bash
-FLASH_APP=my-app FLASH_ENV=production python my_script.py
-```
+After deploying your Flash app, you can call your `@Endpoint` functions directly from Python scripts. Flash automatically resolves the app context from your project structure, so in most cases you can run scripts without any additional configuration.
 
 ### How it works
 
-When you call an `@Endpoint` function and both `FLASH_APP` and `FLASH_ENV` are set, Flash:
+When you run a script that calls an `@Endpoint` function, Flash:
 
-1. Looks up the deployed endpoint by name within the specified app and environment.
-2. Routes the request to that endpoint using Flash's sentinel service.
-3. Returns the result to your script.
+1. Detects the app context from the project directory structure.
+2. Looks up the deployed endpoint by name within the resolved app and environment.
+3. Routes the request to that endpoint using Flash's sentinel service.
+4. Returns the result to your script.
 
 This lets you reuse the same `@Endpoint` function definitions to interact with deployed endpoints without modifying your code.
 
@@ -419,7 +403,7 @@ async def run_inference(data: dict) -> dict:
     return {"result": "processed"}
 ```
 
-You can call it from a separate script:
+You can call it from a separate script in the same project:
 
 ```python
 # call_inference.py
@@ -427,7 +411,7 @@ import asyncio
 from gpu_worker import run_inference
 
 async def main():
-    # With FLASH_APP and FLASH_ENV set, this calls the deployed endpoint
+    # Flash resolves the app context automatically
     result = await run_inference({"input": "data"})
     print(result)
 
@@ -435,7 +419,23 @@ if __name__ == "__main__":
     asyncio.run(main())
 ```
 
-Run the script with the environment variables:
+Run the script:
+
+```bash
+python call_inference.py
+```
+
+### Override the resolved context
+
+Flash resolves the app name from your project's directory structure. Use `FLASH_APP` and `FLASH_ENV` environment variables to override this automatic resolution when needed.
+
+A common use case is when you move a script to a different directory. Since the resolved app name depends on the directory location, moving the script changes the resolved context. To continue targeting the original app, set `FLASH_APP` explicitly:
+
+```bash
+FLASH_APP=my-app python call_inference.py
+```
+
+You can also override the environment:
 
 ```bash
 FLASH_APP=my-app FLASH_ENV=production python call_inference.py
@@ -443,7 +443,7 @@ FLASH_APP=my-app FLASH_ENV=production python call_inference.py
 
 ### Error without context
 
-If you run a script with `@Endpoint` calls without setting `FLASH_APP` and `FLASH_ENV`, and without using `flash dev`, Flash raises an error:
+If Flash cannot resolve the app context and you haven't set the environment variables, it raises an error:
 
 ```text
 RuntimeError: no flash context for endpoint 'inference'. either:
@@ -451,8 +451,6 @@ RuntimeError: no flash context for endpoint 'inference'. either:
   - set FLASH_APP and FLASH_ENV to target a deployed environment
 ```
 
-This explicit error prevents accidental provisioning of new endpoints when you intend to use existing deployed ones.
-
 ### Automatic context in deployed workers
 
 When Flash deploys your app, it automatically sets `FLASH_APP` and `FLASH_ENV` environment variables on each worker. This enables cross-endpoint communication within your deployed application without additional configuration.

From 60a5dddc189badf6874937cc492bf89b704a9e70 Mon Sep 17 00:00:00 2001
From: "promptless[bot]" <promptless[bot]@users.noreply.github.com>
Date: Sun, 26 Apr 2026 21:01:05 +0000
Subject: [PATCH 3/3] docs: Restructure endpoint calling examples per reviewer
 feedback

Show calling the endpoint within the same script first, then importing
from another script as an alternative pattern.
---
 flash/apps/deploy-apps.mdx | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/flash/apps/deploy-apps.mdx b/flash/apps/deploy-apps.mdx
index f594dc0a..09c6ee7e 100644
--- a/flash/apps/deploy-apps.mdx
+++ b/flash/apps/deploy-apps.mdx
@@ -384,12 +384,13 @@ When you run a script that calls an `@Endpoint` function, Flash:
 
 This lets you reuse the same `@Endpoint` function definitions to interact with deployed endpoints without modifying your code.
 
-### Example
+### Example: calling within the same script
 
-Given a deployed endpoint:
+The simplest approach is to call the endpoint directly in the same file where it's defined:
 
 ```python
 # gpu_worker.py
+import asyncio
 from runpod_flash import Endpoint, GpuType
 
 @Endpoint(
@@ -401,9 +402,24 @@ async def run_inference(data: dict) -> dict:
     import torch
     # Inference logic
     return {"result": "processed"}
+
+async def main():
+    result = await run_inference({"input": "data"})
+    print(result)
+
+if __name__ == "__main__":
+    asyncio.run(main())
 ```
 
-You can call it from a separate script in the same project:
+Run the script:
+
+```bash
+python gpu_worker.py
+```
+
+### Example: importing from another script
+
+You can also import and call endpoints from a separate script:
 
 ```python
 # call_inference.py