diff --git a/docs/source/tutorial_sources/zero-to-forge/1_RL_and_Forge_Fundamentals.md b/docs/source/tutorial_sources/zero-to-forge/1_RL_and_Forge_Fundamentals.md
index fd7c0cf6b..ae74df101 100644
--- a/docs/source/tutorial_sources/zero-to-forge/1_RL_and_Forge_Fundamentals.md
+++ b/docs/source/tutorial_sources/zero-to-forge/1_RL_and_Forge_Fundamentals.md
@@ -88,7 +88,7 @@ graph LR
subgraph Services["TorchForge Services (Real Classes)"]
direction TB
S1["DatasetActor"]
- S2["Policy"]
+ S2["Generator"]
S3["RewardActor"]
S4["ReferenceModel"]
S5["ReplayBuffer"]
@@ -290,7 +290,7 @@ TorchForge handles behind the scenes:
### Independent Scaling
```python
-from forge.actors.policy import Policy
+from forge.actors.generator import Generator as Policy
from forge.actors.replay_buffer import ReplayBuffer
from forge.actors.reference_model import ReferenceModel
from forge.actors.trainer import RLTrainer
diff --git a/docs/source/tutorial_sources/zero-to-forge/2_Forge_Internals.md b/docs/source/tutorial_sources/zero-to-forge/2_Forge_Internals.md
index 9c8f89bc2..335c5fc5a 100644
--- a/docs/source/tutorial_sources/zero-to-forge/2_Forge_Internals.md
+++ b/docs/source/tutorial_sources/zero-to-forge/2_Forge_Internals.md
@@ -73,7 +73,7 @@ The service creation automatically handles:
- Message routing and serialization
```python
-from forge.actors.policy import Policy
+from forge.actors.generator import Generator as Policy
model = "Qwen/Qwen3-1.7B"
@@ -560,7 +560,7 @@ Now let's see how services coordinate in a real training loop:
import asyncio
import torch
-from forge.actors.policy import Policy
+from forge.actors.generator import Generator as Policy
from forge.actors.reference_model import ReferenceModel
from forge.actors.replay_buffer import ReplayBuffer
from forge.actors.trainer import RLTrainer
diff --git a/docs/source/tutorial_sources/zero-to-forge/3_Monarch_101.md b/docs/source/tutorial_sources/zero-to-forge/3_Monarch_101.md
index a5a28c7a6..8a53566c0 100644
--- a/docs/source/tutorial_sources/zero-to-forge/3_Monarch_101.md
+++ b/docs/source/tutorial_sources/zero-to-forge/3_Monarch_101.md
@@ -18,15 +18,15 @@ graph TD
end
subgraph MonarchLayer["3. Monarch Actor Layer"]
- ActorMesh["ActorMesh PolicyActor: 4 instances, Different GPUs, Message passing"]
+ ActorMesh["ActorMesh Policy Actor: 4 instances, Different GPUs, Message passing"]
ProcMesh["ProcMesh: 4 processes, GPU topology 0,1,2,3, Network interconnect"]
end
subgraph Hardware["4. Physical Hardware"]
- GPU0["GPU 0: PolicyActor #1, vLLM Engine, Model Weights"]
- GPU1["GPU 1: PolicyActor #2, vLLM Engine, Model Weights"]
- GPU2["GPU 2: PolicyActor #3, vLLM Engine, Model Weights"]
- GPU3["GPU 3: PolicyActor #4, vLLM Engine, Model Weights"]
+ GPU0["GPU 0: Policy Actor #1, vLLM Engine, Model Weights"]
+ GPU1["GPU 1: Policy Actor #2, vLLM Engine, Model Weights"]
+ GPU2["GPU 2: Policy Actor #3, vLLM Engine, Model Weights"]
+ GPU3["GPU 3: Policy Actor #4, vLLM Engine, Model Weights"]
end
Call --> ServiceInterface
@@ -154,7 +154,7 @@ await procs.stop()
**ActorMesh** is created when you spawn actors across a ProcMesh. Key points:
-- **One actor instance per process**: `mesh.spawn("policy", PolicyActor)` creates one PolicyActor in each process
+- **One actor instance per process**: `mesh.spawn("policy", Policy)` creates one Policy Actor in each process
- **Same constructor arguments**: All instances get the same initialization parameters
- **Independent state**: Each actor instance maintains its own state and memory
- **Message routing**: You can send messages to one actor or all actors using different methods
@@ -162,9 +162,9 @@ await procs.stop()
```python
# Simple example:
procs = spawn_procs(per_host={"gpus": 4}) # 4 processes
-policy_actors = procs.spawn("policy", PolicyActor, model="Qwen/Qwen3-7B")
+policy_actors = procs.spawn("policy", Policy, model="Qwen/Qwen3-7B")
-# Now you have 4 PolicyActor instances, one per GPU
+# Now you have 4 Policy Actor instances, one per GPU
# All initialized with the same model parameter
```
@@ -177,29 +177,29 @@ Now the key insight: **TorchForge services are ServiceActors that manage ActorMe
```mermaid
graph TD
subgraph ServiceCreation["Service Creation Process"]
- Call["await PolicyActor.options(num_replicas=4, procs=1).as_service(model='Qwen')"]
+ Call["await Policy.options(num_replicas=4, procs=1).as_service(model='Qwen')"]
ServiceActor["ServiceActor: Manages 4 replicas, Health checks, Routes calls"]
subgraph Replicas["4 Independent Replicas"]
subgraph R0["Replica 0"]
PM0["ProcMesh: 1 process, GPU 0"]
- AM0["ActorMesh
1 PolicyActor"]
+ AM0["ActorMesh
1 Policy Actor"]
end
subgraph R1["Replica 1"]
PM1["ProcMesh: 1 process, GPU 1"]
- AM1["ActorMesh
1 PolicyActor"]
+ AM1["ActorMesh
1 Policy Actor"]
end
subgraph R2["Replica 2"]
PM2["ProcMesh: 1 process, GPU 2"]
- AM2["ActorMesh
1 PolicyActor"]
+ AM2["ActorMesh
1 Policy Actor"]
end
subgraph R3["Replica 3"]
PM3["ProcMesh: 1 process, GPU 3"]
- AM3["ActorMesh
1 PolicyActor"]
+ AM3["ActorMesh
1 Policy Actor"]
end
end
@@ -232,9 +232,9 @@ graph TD
ServiceActor["ServiceActor: Selects healthy replica, Load balancing, Failure handling"]
- SelectedReplica["Selected Replica #2: ProcMesh 1 process, ActorMesh 1 PolicyActor"]
+ SelectedReplica["Selected Replica #2: ProcMesh 1 process, ActorMesh 1 Policy Actor"]
- PolicyActor["PolicyActor Instance: Loads model, Runs vLLM inference"]
+ PolicyActor["Policy Actor Instance: Loads model, Runs vLLM inference"]
GPU["GPU 2: vLLM engine, Model weights, KV cache, CUDA kernels"]