perstack-ai · FL4TLiN3 · Feb 17, 2026 · Feb 17, 2026 · Feb 17, 2026
diff --git a/.changeset/create-expert-delegation.md b/.changeset/create-expert-delegation.md
@@ -0,0 +1,8 @@
+---
+"@perstack/base": patch
+"create-expert": patch
+"@perstack/runtime": patch
+"@perstack/skill-manager": patch
+---
+
+Add createExpert tool and replace runExpert workflow with in-process delegation
diff --git a/apps/base/src/tools/skill-management.test.ts b/apps/base/src/tools/skill-management.test.ts
@@ -3,6 +3,7 @@ import type { SkillManagementCallbacks } from "./skill-management.js"
 import {
   registerAddDelegate,
   registerAddSkill,
+  registerCreateExpert,
   registerRemoveDelegate,
   registerRemoveSkill,
 } from "./skill-management.js"
@@ -13,6 +14,7 @@ function createMockCallbacks(): SkillManagementCallbacks {
     removeSkill: vi.fn().mockResolvedValue(undefined),
     addDelegate: vi.fn().mockResolvedValue({ delegateToolName: "delegate-tool" }),
     removeDelegate: vi.fn().mockResolvedValue(undefined),
+    createExpert: vi.fn().mockResolvedValue({ expertKey: "my-expert" }),
   }
 }
 
@@ -199,4 +201,53 @@ describe("skill-management tools", () => {
       })
     })
   })
+
+  describe("createExpert", () => {
+    it("registers tool with correct metadata", () => {
+      const server = createMockServer()
+      const callbacks = createMockCallbacks()
+      registerCreateExpert(server as never, callbacks)
+      expect(server.registerTool).toHaveBeenCalledWith(
+        "createExpert",
+        expect.objectContaining({ title: "Create expert" }),
+        expect.any(Function),
+      )
+    })
+
+    it("calls callback with correct input and returns expert key", async () => {
+      const server = createMockServer()
+      const callbacks = createMockCallbacks()
+      registerCreateExpert(server as never, callbacks)
+      const handler = getHandler(server)
+      const input = {
+        key: "test-expert",
+        instruction: "Test instruction",
+        description: "A test expert",
+      }
+      const result = await handler(input)
+      expect(callbacks.createExpert).toHaveBeenCalledWith(input)
+      expect(result).toStrictEqual({
+        content: [{ type: "text", text: JSON.stringify({ expertKey: "my-expert" }) }],
+      })
+    })
+
+    it("returns errorToolResult when callback throws", async () => {
+      const server = createMockServer()
+      const callbacks = createMockCallbacks()
+      ;(callbacks.createExpert as ReturnType<typeof vi.fn>).mockRejectedValue(
+        new Error("invalid expert"),
+      )
+      registerCreateExpert(server as never, callbacks)
+      const handler = getHandler(server)
+      const result = await handler({ key: "bad", instruction: "x" })
+      expect(result).toStrictEqual({
+        content: [
+          {
+            type: "text",
+            text: JSON.stringify({ error: "Error", message: "invalid expert" }),
+          },
+        ],
+      })
+    })
+  })
 })
diff --git a/apps/base/src/tools/skill-management.ts b/apps/base/src/tools/skill-management.ts
@@ -19,6 +19,31 @@ export interface SkillManagementCallbacks {
   removeSkill(skillName: string): Promise<void>
   addDelegate(expertKey: string): Promise<{ delegateToolName: string }>
   removeDelegate(expertName: string): Promise<void>
+  createExpert(input: {
+    key: string
+    instruction: string
+    description?: string
+    version?: string
+    skills?: Record<
+      string,
+      {
+        type: "mcpStdioSkill" | "mcpSseSkill"
+        command?: string
+        packageName?: string
+        args?: string[]
+        requiredEnv?: string[]
+        endpoint?: string
+        description?: string
+        rule?: string
+        pick?: string[]
+        omit?: string[]
+        lazyInit?: boolean
+      }
+    >
+    delegates?: string[]
+    tags?: string[]
+    providerTools?: string[]
+  }): Promise<{ expertKey: string }>
 }
 
 export function registerAddSkill(server: McpServer, callbacks: SkillManagementCallbacks) {
@@ -131,6 +156,83 @@ export function registerRemoveDelegate(server: McpServer, callbacks: SkillManage
   )
 }
 
+export function registerCreateExpert(server: McpServer, callbacks: SkillManagementCallbacks) {
+  server.registerTool(
+    "createExpert",
+    {
+      title: "Create expert",
+      description:
+        "Dynamically create an expert definition in memory. Returns the expert key so you can add it as a delegate.",
+      inputSchema: {
+        key: z.string().describe("Unique expert key (kebab-case)"),
+        instruction: z.string().describe("System instruction for the expert"),
+        description: z.string().optional().describe("Human-readable description"),
+        version: z.string().optional().describe("Semantic version (defaults to 1.0.0)"),
+        skills: z
+          .record(
+            z.string(),
+            z.object({
+              type: z.enum(["mcpStdioSkill", "mcpSseSkill"]).describe("Skill transport type"),
+              command: z.string().optional().describe("Command to execute (for stdio skills)"),
+              packageName: z
+                .string()
+                .optional()
+                .describe("Package name for npx/uvx (for stdio skills)"),
+              args: z.array(z.string()).optional().describe("Additional command arguments"),
+              requiredEnv: z
+                .array(z.string())
+                .optional()
+                .describe("Required environment variable names"),
+              endpoint: z.string().optional().describe("SSE endpoint URL (for SSE skills)"),
+              description: z.string().optional().describe("Human-readable description"),
+              rule: z.string().optional().describe("Usage rules for the LLM"),
+              pick: z.array(z.string()).optional().describe("Tool names to include (whitelist)"),
+              omit: z.array(z.string()).optional().describe("Tool names to exclude (blacklist)"),
+              lazyInit: z.boolean().optional().describe("Lazy initialization"),
+            }),
+          )
+          .optional()
+          .describe("Skills map (defaults to @perstack/base)"),
+        delegates: z.array(z.string()).optional().describe("Expert keys to delegate to"),
+        tags: z.array(z.string()).optional().describe("Tags for categorization"),
+        providerTools: z.array(z.string()).optional().describe("Provider-specific tool names"),
+      },
+    },
+    async (input: {
+      key: string
+      instruction: string
+      description?: string
+      version?: string
+      skills?: Record<
+        string,
+        {
+          type: "mcpStdioSkill" | "mcpSseSkill"
+          command?: string
+          packageName?: string
+          args?: string[]
+          requiredEnv?: string[]
+          endpoint?: string
+          description?: string
+          rule?: string
+          pick?: string[]
+          omit?: string[]
+          lazyInit?: boolean
+        }
+      >
+      delegates?: string[]
+      tags?: string[]
+      providerTools?: string[]
+    }) => {
+      try {
+        return successToolResult(await callbacks.createExpert(input))
+      } catch (e) {
+        if (e instanceof Error) return errorToolResult(e)
+        throw e
+      }
+    },
+  )
+}
+
 export function registerSkillManagementTools(
   server: McpServer,
   callbacks: SkillManagementCallbacks,
@@ -139,4 +241,5 @@ export function registerSkillManagementTools(
   registerRemoveSkill(server, callbacks)
   registerAddDelegate(server, callbacks)
   registerRemoveDelegate(server, callbacks)
+  registerCreateExpert(server, callbacks)
 }
diff --git a/apps/create-expert/perstack.toml b/apps/create-expert/perstack.toml
@@ -65,22 +65,26 @@ pick = ["readTextFile", "writeTextFile", "listDirectory", "think", "attemptCompl
 
 1. First, check if a `perstack.toml` already exists in the current directory using `readTextFile`
 2. If it exists, read and understand the current configuration
-3. Based on the user's request, create or modify the expert definition
-4. Write the updated perstack.toml using `writeTextFile`
-5. Preserve all existing content when modifying (do not remove existing experts unless asked)
-6. After writing, test-run the expert using `runExpert` to verify it works
-7. Review the activities: check that expected tools were called and the completion text is reasonable
-8. If the test run shows errors or unexpected behavior, fix the perstack.toml and re-test
-9. Use `attemptCompletion` when the expert is created and verified
-
-## Testing with runExpert
-
-After writing a perstack.toml file, always test-run the expert you created:
-- Use the absolute path to the perstack.toml you just wrote as `configPath` (use the current working directory path)
-- Use the expert key you defined as `expertKey`
-- Choose a simple, realistic query that exercises the expert's core functionality
-- Review the activities: check that expected tools were called and the completion text is reasonable
-- If the run fails or produces errors, fix the perstack.toml and re-test
+3. Based on the user's request, draft the expert definition
+4. Create the expert in memory using `createExpert` to validate the definition
+5. Add it as a delegate using `addDelegate` so you can test it
+6. Test the expert by calling the delegate tool with a simple, realistic query
+7. Review the result: check that the expert behaves as expected
+8. If the test shows errors or unexpected behavior:
+   - Use `removeDelegate` to remove the current delegate
+   - Modify the definition and call `createExpert` again with the same key
+   - Add it as a delegate again with `addDelegate` and re-test
+9. Once the expert works correctly, write the final `perstack.toml` using `writeTextFile`
+10. Use `attemptCompletion` when the expert is created and verified
+
+## Testing with createExpert + addDelegate
+
+After drafting an expert definition, always test it in memory before writing perstack.toml:
+- Use `createExpert` with the expert key, instruction, description, skills, and other fields
+- Use `addDelegate` with the expert key to make it callable
+- Call the delegate tool with a simple query that exercises the expert's core functionality
+- Review the result to verify correctness
+- If issues arise, iterate: `removeDelegate` -> fix -> `createExpert` -> `addDelegate` -> re-test
 
 ## Important Rules
 
@@ -106,12 +110,7 @@ pick = [
   "getFileInfo",
   "think",
   "attemptCompletion",
+  "createExpert",
+  "addDelegate",
+  "removeDelegate",
 ]
-
-[experts."expert".skills."create-expert-skill"]
-type = "mcpStdioSkill"
-description = "Test-run expert definitions to verify they work correctly"
-command = "npx"
-packageName = "@perstack/create-expert-skill"
-requiredEnv = ["PROVIDER_API_KEY"]
-rule = "After creating or modifying an expert in perstack.toml, use runExpert to test it with a simple query. Review the activities to verify correctness."
diff --git a/e2e/README.md b/e2e/README.md
@@ -25,6 +25,8 @@ pnpm test:e2e -- --testNamePattern "delegate"
 
 ```
 e2e/
+├── create-expert/                       # Create expert tests
+│   └── create-expert.test.ts            # Expert creation and modification
 ├── perstack-cli/                        # perstack CLI tests
 │   ├── bundled-base.test.ts             # Bundled base skill
 │   ├── continue.test.ts                 # Continue job, resume from checkpoint
@@ -59,6 +61,15 @@ e2e/
 
 ## Functional Test Categories
 
+### create-expert/
+
+#### Create Expert (`create-expert.test.ts`)
+
+| Test                                      | Purpose                                   |
+| ----------------------------------------- | ----------------------------------------- |
+| `should create a new perstack.toml`       | Verify new expert creation via delegation |
+| `should modify an existing perstack.toml` | Verify existing experts are preserved     |
+
 ### perstack-cli/
 
 #### Continue Job (`continue.test.ts`)