Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .changeset/create-expert-delegation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
"@perstack/base": patch
"create-expert": patch
"@perstack/runtime": patch
"@perstack/skill-manager": patch
---

Add createExpert tool and replace runExpert workflow with in-process delegation
51 changes: 51 additions & 0 deletions apps/base/src/tools/skill-management.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import type { SkillManagementCallbacks } from "./skill-management.js"
import {
registerAddDelegate,
registerAddSkill,
registerCreateExpert,
registerRemoveDelegate,
registerRemoveSkill,
} from "./skill-management.js"
Expand All @@ -13,6 +14,7 @@ function createMockCallbacks(): SkillManagementCallbacks {
removeSkill: vi.fn().mockResolvedValue(undefined),
addDelegate: vi.fn().mockResolvedValue({ delegateToolName: "delegate-tool" }),
removeDelegate: vi.fn().mockResolvedValue(undefined),
createExpert: vi.fn().mockResolvedValue({ expertKey: "my-expert" }),
}
}

Expand Down Expand Up @@ -199,4 +201,53 @@ describe("skill-management tools", () => {
})
})
})

describe("createExpert", () => {
it("registers tool with correct metadata", () => {
const server = createMockServer()
const callbacks = createMockCallbacks()
registerCreateExpert(server as never, callbacks)
expect(server.registerTool).toHaveBeenCalledWith(
"createExpert",
expect.objectContaining({ title: "Create expert" }),
expect.any(Function),
)
})

it("calls callback with correct input and returns expert key", async () => {
const server = createMockServer()
const callbacks = createMockCallbacks()
registerCreateExpert(server as never, callbacks)
const handler = getHandler(server)
const input = {
key: "test-expert",
instruction: "Test instruction",
description: "A test expert",
}
const result = await handler(input)
expect(callbacks.createExpert).toHaveBeenCalledWith(input)
expect(result).toStrictEqual({
content: [{ type: "text", text: JSON.stringify({ expertKey: "my-expert" }) }],
})
})

it("returns errorToolResult when callback throws", async () => {
const server = createMockServer()
const callbacks = createMockCallbacks()
;(callbacks.createExpert as ReturnType<typeof vi.fn>).mockRejectedValue(
new Error("invalid expert"),
)
registerCreateExpert(server as never, callbacks)
const handler = getHandler(server)
const result = await handler({ key: "bad", instruction: "x" })
expect(result).toStrictEqual({
content: [
{
type: "text",
text: JSON.stringify({ error: "Error", message: "invalid expert" }),
},
],
})
})
})
})
103 changes: 103 additions & 0 deletions apps/base/src/tools/skill-management.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,31 @@ export interface SkillManagementCallbacks {
removeSkill(skillName: string): Promise<void>
addDelegate(expertKey: string): Promise<{ delegateToolName: string }>
removeDelegate(expertName: string): Promise<void>
createExpert(input: {
key: string
instruction: string
description?: string
version?: string
skills?: Record<
string,
{
type: "mcpStdioSkill" | "mcpSseSkill"
command?: string
packageName?: string
args?: string[]
requiredEnv?: string[]
endpoint?: string
description?: string
rule?: string
pick?: string[]
omit?: string[]
lazyInit?: boolean
}
>
delegates?: string[]
tags?: string[]
providerTools?: string[]
}): Promise<{ expertKey: string }>
}

export function registerAddSkill(server: McpServer, callbacks: SkillManagementCallbacks) {
Expand Down Expand Up @@ -131,6 +156,83 @@ export function registerRemoveDelegate(server: McpServer, callbacks: SkillManage
)
}

export function registerCreateExpert(server: McpServer, callbacks: SkillManagementCallbacks) {
server.registerTool(
"createExpert",
{
title: "Create expert",
description:
"Dynamically create an expert definition in memory. Returns the expert key so you can add it as a delegate.",
inputSchema: {
key: z.string().describe("Unique expert key (kebab-case)"),
instruction: z.string().describe("System instruction for the expert"),
description: z.string().optional().describe("Human-readable description"),
version: z.string().optional().describe("Semantic version (defaults to 1.0.0)"),
skills: z
.record(
z.string(),
z.object({
type: z.enum(["mcpStdioSkill", "mcpSseSkill"]).describe("Skill transport type"),
command: z.string().optional().describe("Command to execute (for stdio skills)"),
packageName: z
.string()
.optional()
.describe("Package name for npx/uvx (for stdio skills)"),
args: z.array(z.string()).optional().describe("Additional command arguments"),
requiredEnv: z
.array(z.string())
.optional()
.describe("Required environment variable names"),
endpoint: z.string().optional().describe("SSE endpoint URL (for SSE skills)"),
description: z.string().optional().describe("Human-readable description"),
rule: z.string().optional().describe("Usage rules for the LLM"),
pick: z.array(z.string()).optional().describe("Tool names to include (whitelist)"),
omit: z.array(z.string()).optional().describe("Tool names to exclude (blacklist)"),
lazyInit: z.boolean().optional().describe("Lazy initialization"),
}),
)
.optional()
.describe("Skills map (defaults to @perstack/base)"),
delegates: z.array(z.string()).optional().describe("Expert keys to delegate to"),
tags: z.array(z.string()).optional().describe("Tags for categorization"),
providerTools: z.array(z.string()).optional().describe("Provider-specific tool names"),
},
},
async (input: {
key: string
instruction: string
description?: string
version?: string
skills?: Record<
string,
{
type: "mcpStdioSkill" | "mcpSseSkill"
command?: string
packageName?: string
args?: string[]
requiredEnv?: string[]
endpoint?: string
description?: string
rule?: string
pick?: string[]
omit?: string[]
lazyInit?: boolean
}
>
delegates?: string[]
tags?: string[]
providerTools?: string[]
}) => {
try {
return successToolResult(await callbacks.createExpert(input))
} catch (e) {
if (e instanceof Error) return errorToolResult(e)
throw e
}
},
)
}

export function registerSkillManagementTools(
server: McpServer,
callbacks: SkillManagementCallbacks,
Expand All @@ -139,4 +241,5 @@ export function registerSkillManagementTools(
registerRemoveSkill(server, callbacks)
registerAddDelegate(server, callbacks)
registerRemoveDelegate(server, callbacks)
registerCreateExpert(server, callbacks)
}
47 changes: 23 additions & 24 deletions apps/create-expert/perstack.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,22 +65,26 @@ pick = ["readTextFile", "writeTextFile", "listDirectory", "think", "attemptCompl

1. First, check if a `perstack.toml` already exists in the current directory using `readTextFile`
2. If it exists, read and understand the current configuration
3. Based on the user's request, create or modify the expert definition
4. Write the updated perstack.toml using `writeTextFile`
5. Preserve all existing content when modifying (do not remove existing experts unless asked)
6. After writing, test-run the expert using `runExpert` to verify it works
7. Review the activities: check that expected tools were called and the completion text is reasonable
8. If the test run shows errors or unexpected behavior, fix the perstack.toml and re-test
9. Use `attemptCompletion` when the expert is created and verified

## Testing with runExpert

After writing a perstack.toml file, always test-run the expert you created:
- Use the absolute path to the perstack.toml you just wrote as `configPath` (use the current working directory path)
- Use the expert key you defined as `expertKey`
- Choose a simple, realistic query that exercises the expert's core functionality
- Review the activities: check that expected tools were called and the completion text is reasonable
- If the run fails or produces errors, fix the perstack.toml and re-test
3. Based on the user's request, draft the expert definition
4. Create the expert in memory using `createExpert` to validate the definition
5. Add it as a delegate using `addDelegate` so you can test it
6. Test the expert by calling the delegate tool with a simple, realistic query
7. Review the result: check that the expert behaves as expected
8. If the test shows errors or unexpected behavior:
- Use `removeDelegate` to remove the current delegate
- Modify the definition and call `createExpert` again with the same key
- Add it as a delegate again with `addDelegate` and re-test
9. Once the expert works correctly, write the final `perstack.toml` using `writeTextFile`
10. Use `attemptCompletion` when the expert is created and verified

## Testing with createExpert + addDelegate

After drafting an expert definition, always test it in memory before writing perstack.toml:
- Use `createExpert` with the expert key, instruction, description, skills, and other fields
- Use `addDelegate` with the expert key to make it callable
- Call the delegate tool with a simple query that exercises the expert's core functionality
- Review the result to verify correctness
- If issues arise, iterate: `removeDelegate` -> fix -> `createExpert` -> `addDelegate` -> re-test

## Important Rules

Expand All @@ -106,12 +110,7 @@ pick = [
"getFileInfo",
"think",
"attemptCompletion",
"createExpert",
"addDelegate",
"removeDelegate",
]

[experts."expert".skills."create-expert-skill"]
type = "mcpStdioSkill"
description = "Test-run expert definitions to verify they work correctly"
command = "npx"
packageName = "@perstack/create-expert-skill"
requiredEnv = ["PROVIDER_API_KEY"]
rule = "After creating or modifying an expert in perstack.toml, use runExpert to test it with a simple query. Review the activities to verify correctness."
11 changes: 11 additions & 0 deletions e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ pnpm test:e2e -- --testNamePattern "delegate"

```
e2e/
├── create-expert/ # Create expert tests
│ └── create-expert.test.ts # Expert creation and modification
├── perstack-cli/ # perstack CLI tests
│ ├── bundled-base.test.ts # Bundled base skill
│ ├── continue.test.ts # Continue job, resume from checkpoint
Expand Down Expand Up @@ -59,6 +61,15 @@ e2e/

## Functional Test Categories

### create-expert/

#### Create Expert (`create-expert.test.ts`)

| Test | Purpose |
| ----------------------------------------- | ----------------------------------------- |
| `should create a new perstack.toml` | Verify new expert creation via delegation |
| `should modify an existing perstack.toml` | Verify existing experts are preserved |

### perstack-cli/

#### Continue Job (`continue.test.ts`)
Expand Down
Loading