diff --git a/.gitignore b/.gitignore index e9d9434a..2fdd7d0a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ dist/ .vscode/ .idea/ .claude/settings.local.json +.sdk-under-test/ diff --git a/AGENTS.md b/AGENTS.md index fc864ba5..c91484eb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -52,6 +52,7 @@ Keep scenarios separate when they're genuinely independent features or when they - **Same `id` for SUCCESS and FAIL.** A check should use one slug and flip `status` + `errorMessage`, not branch into `foo-success` vs `foo-failure` slugs. - **Optimize for Ctrl+F on the slug.** Repetitive check blocks are fine — easier to find the failing one than to unwind a clever helper. - Reuse `ConformanceCheck` and other types from `src/types.ts` rather than defining parallel shapes. +- **Don't reimplement the runner.** New subcommands that need to "select scenarios → run them → print summary → compute exit code" must go through the existing `client` / `server` commands (subprocess via `process.execPath` like `tier-check` and `sdk` do) or call shared helpers — never a parallel suite-map / summary loop. - Include `specReferences` pointing to the relevant spec section. - **Severity follows the spec keyword:** MUST / MUST NOT → `FAILURE`; SHOULD / SHOULD NOT → `WARNING`. (CI treats WARNING as a failure, so Tier-1 SDKs still need to satisfy SHOULDs — see #245.) diff --git a/README.md b/README.md index b2b5f0e4..3dad40aa 100644 --- a/README.md +++ b/README.md @@ -210,6 +210,40 @@ Run `npx @modelcontextprotocol/conformance list --server` to see all available s - **resources-\*** - Resource management scenarios - **prompts-\*** - Prompt management scenarios +## Running Against an SDK at a Specific Ref + +The `sdk` subcommand clones an SDK repository at a given ref, builds it, and runs the **local** conformance build against it. This is the inner-loop tool for scenario authors and the basis for cross-SDK CI. Examples below use `npm start --` so they run from source — no `npm run build` between edits. + +```bash +# Clone and run everything against typescript-sdk@main +npm start -- sdk typescript-sdk@main + +# Against a specific tag, SHA, or branch +npm start -- sdk typescript-sdk@v1.29.0 +npm start -- sdk typescript-sdk@abc123f +npm start -- sdk python-sdk@some-feature-branch + +# Use an existing local checkout (no clone, no fetch) +npm start -- sdk --path ../typescript-sdk --skip-build + +# Narrow to one mode / scenario / suite +npm start -- sdk --path ../typescript-sdk --mode server --scenario server-initialize +npm start -- sdk typescript-sdk@main --mode client --suite auth +``` + +Build/run commands for each official SDK are looked up by name from [`src/sdk-runner/known-sdks.ts`](src/sdk-runner/known-sdks.ts) — no config file is required in the SDK repo. Resolution order is **CLI flag > `conformance.config.yaml` in the SDK checkout (optional override) > built-in entry**, so any field can be overridden on the command line for refs that diverge from the built-in: + +```bash +npm start -- sdk owner/go-sdk@some-branch \ + --mode client \ + --build-cmd 'go build -tags mcp_go_client_oauth -o ./.conformance-client ./conformance/everything-client' \ + --client-cmd './.conformance-client' +``` + +To add a new SDK to the matrix, add an entry to `KNOWN_SDKS`. + +Clones are cached under `.sdk-under-test/` and reused (fetched) on subsequent runs. + ## SDK Tier Assessment The `tier-check` subcommand evaluates an MCP SDK repository against [SEP-1730](https://github.com/modelcontextprotocol/modelcontextprotocol/issues/1730) (the SDK Tiering System): diff --git a/src/index.ts b/src/index.ts index 013f44c4..a3431cd3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -46,6 +46,7 @@ import { } from './expected-failures'; import { createTierCheckCommand } from './tier-check'; import { createNewSepCommand } from './new-sep'; +import { createSdkCommand } from './sdk-runner'; import packageJson from '../package.json'; // Note on naming: `command` refers to which CLI command is calling this. @@ -544,6 +545,9 @@ program.addCommand(createTierCheckCommand()); // New SEP scaffolding command program.addCommand(createNewSepCommand()); +// SDK command - run local conformance against an SDK at a specific ref +program.addCommand(createSdkCommand()); + // List scenarios command program .command('list') diff --git a/src/sdk-runner/checkout.ts b/src/sdk-runner/checkout.ts new file mode 100644 index 00000000..1b77b9f6 --- /dev/null +++ b/src/sdk-runner/checkout.ts @@ -0,0 +1,109 @@ +import { spawn } from 'child_process'; +import { promises as fs } from 'fs'; +import path from 'path'; + +export interface SdkSpec { + name: string; + ref: string; +} + +const DEFAULT_ORG = 'modelcontextprotocol'; + +export function parseSdkSpec(spec: string): SdkSpec { + const at = spec.lastIndexOf('@'); + if (at <= 0) { + return { name: spec, ref: 'main' }; + } + return { name: spec.slice(0, at), ref: spec.slice(at + 1) }; +} + +function repoUrl(name: string): string { + if (name.includes('/')) { + return `https://github.com/${name}.git`; + } + return `https://github.com/${DEFAULT_ORG}/${name}.git`; +} + +async function git( + args: string[], + cwd: string +): Promise<{ stdout: string; stderr: string }> { + const cmd = 'git'; + return new Promise((resolve, reject) => { + const child = spawn(cmd, args, { cwd, stdio: ['ignore', 'pipe', 'pipe'] }); + let stdout = ''; + let stderr = ''; + child.stdout.on('data', (d) => (stdout += d.toString())); + child.stderr.on('data', (d) => (stderr += d.toString())); + child.on('error', reject); + child.on('close', (code) => { + if (code === 0) { + resolve({ stdout, stderr }); + } else { + reject( + new Error( + `${cmd} ${args.join(' ')} exited with ${code}\n${stderr || stdout}` + ) + ); + } + }); + }); +} + +async function dirExists(dir: string): Promise { + try { + const stat = await fs.stat(dir); + return stat.isDirectory(); + } catch { + return false; + } +} + +/** + * Ensure an SDK is checked out at the requested ref under cacheDir. + * Clones on first use; on subsequent calls fetches and resets to the ref. + * Returns the absolute path to the checkout. + */ +export async function ensureCheckout( + spec: SdkSpec, + cacheDir: string +): Promise { + await fs.mkdir(cacheDir, { recursive: true }); + const safeName = spec.name.replace('/', '__'); + const dir = path.resolve(cacheDir, safeName); + + if (await dirExists(path.join(dir, '.git'))) { + console.error(`[sdk] Fetching ${spec.name} (cached at ${dir})`); + await git(['fetch', '--tags', 'origin'], dir); + } else { + console.error(`[sdk] Cloning ${repoUrl(spec.name)} -> ${dir}`); + await git(['clone', repoUrl(spec.name), dir], cacheDir); + } + + // Try the ref as a remote branch first, then fall back to a local-resolvable + // ref (tag or SHA). + const candidates = [`origin/${spec.ref}`, spec.ref]; + let resolved: string | undefined; + for (const candidate of candidates) { + try { + await git(['rev-parse', '--verify', `${candidate}^{commit}`], dir); + resolved = candidate; + break; + } catch { + // rev-parse failure means this candidate doesn't exist; try the next form + } + } + if (!resolved) { + throw new Error( + `Ref '${spec.ref}' not found in ${spec.name} (tried ${candidates.join(', ')})` + ); + } + + console.error(`[sdk] Checking out ${spec.name}@${spec.ref} (${resolved})`); + await git(['checkout', '--detach', resolved], dir); + + const { stdout } = await git(['rev-parse', '--short', 'HEAD'], dir); + console.error(`[sdk] HEAD is ${stdout.trim()}`); + + return dir; +} diff --git a/src/sdk-runner/config.ts b/src/sdk-runner/config.ts new file mode 100644 index 00000000..35d87259 --- /dev/null +++ b/src/sdk-runner/config.ts @@ -0,0 +1,44 @@ +import { promises as fs } from 'fs'; +import path from 'path'; +import { parse as parseYaml } from 'yaml'; +import { z } from 'zod'; + +export const SdkConfigSchema = z.object({ + build: z.string().optional(), + client: z + .object({ + command: z.string() + }) + .optional(), + server: z + .object({ + command: z.string(), + url: z.string().url(), + readyTimeoutMs: z.number().int().positive().optional() + }) + .optional(), + expectedFailures: z.string().optional() +}); + +export type SdkConfig = z.infer; + +const CONFIG_FILENAMES = [ + 'conformance.config.yaml', + 'conformance.config.yml', + 'conformance.config.json' +]; + +export async function loadSdkConfig(dir: string): Promise { + for (const name of CONFIG_FILENAMES) { + const filePath = path.join(dir, name); + let raw: string; + try { + raw = await fs.readFile(filePath, 'utf-8'); + } catch { + continue; + } + const parsed = name.endsWith('.json') ? JSON.parse(raw) : parseYaml(raw); + return SdkConfigSchema.parse(parsed); + } + return null; +} diff --git a/src/sdk-runner/index.ts b/src/sdk-runner/index.ts new file mode 100644 index 00000000..f0f7def5 --- /dev/null +++ b/src/sdk-runner/index.ts @@ -0,0 +1,286 @@ +import { spawn, ChildProcess } from 'child_process'; +import path from 'path'; +import { Command, Option } from 'commander'; +import { ZodError } from 'zod'; +import { loadSdkConfig, SdkConfig } from './config'; +import { parseSdkSpec, ensureCheckout } from './checkout'; +import { lookupBuiltinConfig, knownSdkNames } from './known-sdks'; + +type Mode = 'client' | 'server' | 'both'; + +function execShell(command: string, cwd: string): Promise { + return new Promise((resolve, reject) => { + const child = spawn(command, { shell: true, cwd, stdio: 'inherit' }); + child.on('error', reject); + child.on('close', (code) => { + if (code === 0) resolve(); + else reject(new Error(`Command failed (exit ${code}): ${command}`)); + }); + }); +} + +/** + * Re-invoke this CLI as a subprocess so scenario selection / reporting stay in + * one place (same approach tier-check uses). Preserves execArgv so tsx/loader + * hooks carry over when running from source. + */ +function selfInvoke(args: string[], cwd: string): Promise { + return new Promise((resolve, reject) => { + const child = spawn( + process.execPath, + [...process.execArgv, process.argv[1], ...args], + { cwd, stdio: 'inherit' } + ); + child.on('error', reject); + child.on('close', (code) => resolve(code ?? 1)); + }); +} + +async function waitForReady(url: string, timeoutMs: number): Promise { + const deadline = Date.now() + timeoutMs; + let lastErr: unknown; + while (Date.now() < deadline) { + try { + await fetch(url, { method: 'GET' }); + return; + } catch (err) { + lastErr = err; + await new Promise((r) => setTimeout(r, 250)); + } + } + throw new Error( + `Server at ${url} did not become ready within ${timeoutMs}ms: ${lastErr}` + ); +} + +async function withManagedServer( + command: string, + cwd: string, + url: string, + readyTimeoutMs: number, + fn: () => Promise +): Promise { + console.error(`[sdk] Starting server: ${command}`); + const child: ChildProcess = spawn(command, { + shell: true, + cwd, + stdio: ['ignore', 'pipe', 'pipe'], + detached: process.platform !== 'win32' + }); + + let stderr = ''; + child.stdout?.on('data', (d) => process.stderr.write(`[server] ${d}`)); + child.stderr?.on('data', (d) => { + stderr += d.toString(); + process.stderr.write(`[server] ${d}`); + }); + + let stopping = false; + const exited = new Promise((_, reject) => { + child.on('exit', (code) => { + if (stopping) return; + reject( + new Error( + `Server exited with code ${code} before tests completed\n${stderr}` + ) + ); + }); + child.on('error', reject); + }); + exited.catch(() => {}); + + try { + await Promise.race([waitForReady(url, readyTimeoutMs), exited]); + console.error(`[sdk] Server ready at ${url}`); + return await Promise.race([fn(), exited]); + } finally { + stopping = true; + console.error(`[sdk] Stopping server`); + if (process.platform !== 'win32' && child.pid) { + try { + process.kill(-child.pid, 'SIGTERM'); + } catch { + child.kill('SIGTERM'); + } + } else { + child.kill('SIGTERM'); + } + } +} + +function passThrough(options: { + scenario?: string; + suite?: string; + timeout?: string; + verbose?: boolean; + output?: string; +}): string[] { + const args: string[] = []; + if (options.scenario) args.push('--scenario', options.scenario); + else if (options.suite) args.push('--suite', options.suite); + if (options.timeout) args.push('--timeout', options.timeout); + if (options.verbose) args.push('--verbose'); + if (options.output) args.push('-o', options.output); + return args; +} + +export function createSdkCommand(): Command { + return new Command('sdk') + .description( + 'Run the local conformance build against an SDK checked out at a specific ref' + ) + .argument( + '[sdk]', + 'SDK to test as [@], e.g. typescript-sdk@main. Name may be owner/repo.' + ) + .option( + '--path ', + 'Use an existing local SDK checkout instead of cloning' + ) + .option( + '--cache-dir ', + 'Directory for cached SDK clones', + '.sdk-under-test' + ) + .addOption( + new Option('--mode ', 'Which side to test') + .choices(['client', 'server', 'both']) + .default('both') + ) + .option('--scenario ', 'Run a single scenario (passed through)') + .option('--suite ', 'Run a suite (passed through)') + .option('--skip-build', 'Skip the SDK build step (reuse prior build)') + .option('--build-cmd ', 'Override the build command from config') + .option('--client-cmd ', 'Override the client command from config') + .option('--server-cmd ', 'Override the server command from config') + .option('--server-url ', 'Override the server URL from config') + .option('--timeout ', 'Per-scenario client timeout (passed through)') + .option('-o, --output ', 'Output directory (passed through)') + .option('--verbose', 'Verbose output (passed through)') + .action(async (sdkArg: string | undefined, options) => { + try { + const mode = options.mode as Mode; + if (options.scenario && mode === 'both') { + throw new Error( + `--scenario requires --mode client or --mode server (a scenario belongs to exactly one side)` + ); + } + if (!sdkArg && !options.path) { + throw new Error( + `Provide an SDK spec (e.g. typescript-sdk@main) or --path` + ); + } + + const spec = sdkArg ? parseSdkSpec(sdkArg) : undefined; + const dir = options.path + ? path.resolve(options.path) + : await ensureCheckout(spec!, options.cacheDir); + const sdkName = spec?.name ?? path.basename(dir); + + // Resolution: CLI flag > config file in SDK checkout > built-in. + const fileConfig: SdkConfig = (await loadSdkConfig(dir)) ?? {}; + const builtinConfig: SdkConfig = lookupBuiltinConfig(sdkName) ?? {}; + const buildCmd: string | undefined = + options.buildCmd ?? fileConfig.build ?? builtinConfig.build; + const clientCmd: string | undefined = + options.clientCmd ?? + fileConfig.client?.command ?? + builtinConfig.client?.command; + const serverCmd: string | undefined = + options.serverCmd ?? + fileConfig.server?.command ?? + builtinConfig.server?.command; + const serverUrl: string | undefined = + options.serverUrl ?? + fileConfig.server?.url ?? + builtinConfig.server?.url; + const expectedFailuresRel = + fileConfig.expectedFailures ?? builtinConfig.expectedFailures; + const expectedFailures = expectedFailuresRel + ? path.resolve(dir, expectedFailuresRel) + : undefined; + + if (buildCmd && !options.skipBuild) { + console.error(`[sdk] Building: ${buildCmd}`); + await execShell(buildCmd, dir); + } else if (!buildCmd) { + console.error( + `[sdk] No build command in config; assuming SDK is already built` + ); + } + + let exitCode = 0; + + if (mode === 'client' || mode === 'both') { + if (!clientCmd) { + throw new Error( + `No client command for '${sdkName}'. Pass --client-cmd, or add it to KNOWN_SDKS in src/sdk-runner/known-sdks.ts (known: ${knownSdkNames().join(', ')}).` + ); + } + const args = [ + 'client', + '--command', + clientCmd, + ...passThrough({ + scenario: options.scenario, + suite: options.suite ?? 'all', + timeout: options.timeout, + verbose: options.verbose, + output: options.output + }) + ]; + if (expectedFailures) + args.push('--expected-failures', expectedFailures); + console.error(`\n[sdk] conformance ${args.join(' ')}\n`); + exitCode ||= await selfInvoke(args, dir); + } + + if (mode === 'server' || mode === 'both') { + if (!serverCmd || !serverUrl) { + throw new Error( + `No server command/url for '${sdkName}'. Pass --server-cmd / --server-url, or add it to KNOWN_SDKS in src/sdk-runner/known-sdks.ts (known: ${knownSdkNames().join(', ')}).` + ); + } + const args = [ + 'server', + '--url', + serverUrl, + ...passThrough({ + scenario: options.scenario, + suite: options.suite, + verbose: options.verbose, + output: options.output + }) + ]; + if (expectedFailures) + args.push('--expected-failures', expectedFailures); + exitCode ||= await withManagedServer( + serverCmd, + dir, + serverUrl, + fileConfig.server?.readyTimeoutMs ?? + builtinConfig.server?.readyTimeoutMs ?? + 15000, + async () => { + console.error(`\n[sdk] conformance ${args.join(' ')}\n`); + return selfInvoke(args, dir); + } + ); + } + + process.exit(exitCode); + } catch (error) { + if (error instanceof ZodError) { + console.error('Config validation error:'); + error.issues.forEach((e) => + console.error(` ${e.path.join('.')}: ${e.message}`) + ); + } else { + console.error( + `[sdk] ${error instanceof Error ? error.message : String(error)}` + ); + } + process.exit(1); + } + }); +} diff --git a/src/sdk-runner/known-sdks.ts b/src/sdk-runner/known-sdks.ts new file mode 100644 index 00000000..4808aeef --- /dev/null +++ b/src/sdk-runner/known-sdks.ts @@ -0,0 +1,44 @@ +import type { SdkConfig } from './config'; + +/** + * Built-in conformance configs for official SDKs, keyed by repo name. + * + * These live here (not in the SDK repos) so adding an SDK to the matrix + * doesn't require a coordinated cross-repo PR. An SDK can still ship a + * conformance.config.yaml at its root to override these — see resolveConfig. + */ +export const KNOWN_SDKS: Record = { + 'typescript-sdk': { + build: 'npm ci && npm run build', + client: { + command: 'npx tsx test/conformance/src/everythingClient.ts' + }, + server: { + command: 'npx tsx test/conformance/src/everythingServer.ts', + url: 'http://localhost:3000/mcp' + }, + expectedFailures: 'test/conformance/conformance-baseline.yml' + }, + 'go-sdk': { + build: 'go build -o ./.conformance-server ./examples/server/conformance', + // Upstream go-sdk has no client conformance fixture yet (see go-sdk#859). + server: { + command: './.conformance-server -http=:3000', + url: 'http://localhost:3000' + } + } +}; + +/** + * Look up a built-in config by SDK name. Accepts bare names (typescript-sdk), + * owner/repo (modelcontextprotocol/typescript-sdk), or a checkout path + * basename — only the final path segment is used as the key. + */ +export function lookupBuiltinConfig(name: string): SdkConfig | null { + const key = name.split('/').pop() ?? name; + return KNOWN_SDKS[key] ?? null; +} + +export function knownSdkNames(): string[] { + return Object.keys(KNOWN_SDKS); +} diff --git a/src/sdk-runner/sdk-runner.test.ts b/src/sdk-runner/sdk-runner.test.ts new file mode 100644 index 00000000..62582c6e --- /dev/null +++ b/src/sdk-runner/sdk-runner.test.ts @@ -0,0 +1,119 @@ +import { describe, expect, it } from 'vitest'; +import { promises as fs } from 'fs'; +import os from 'os'; +import path from 'path'; +import { parseSdkSpec } from './checkout'; +import { loadSdkConfig, SdkConfigSchema } from './config'; +import { lookupBuiltinConfig, KNOWN_SDKS } from './known-sdks'; + +describe('parseSdkSpec', () => { + it('defaults ref to main when omitted', () => { + expect(parseSdkSpec('typescript-sdk')).toEqual({ + name: 'typescript-sdk', + ref: 'main' + }); + }); + + it('splits name@ref', () => { + expect(parseSdkSpec('typescript-sdk@v1.29.0')).toEqual({ + name: 'typescript-sdk', + ref: 'v1.29.0' + }); + }); + + it('handles owner/repo@ref', () => { + expect(parseSdkSpec('someorg/some-sdk@abc123')).toEqual({ + name: 'someorg/some-sdk', + ref: 'abc123' + }); + }); + + it('treats leading @ as part of the name', () => { + expect(parseSdkSpec('@scope/pkg')).toEqual({ + name: '@scope/pkg', + ref: 'main' + }); + }); +}); + +describe('SdkConfigSchema', () => { + it('accepts a minimal client-only config', () => { + const cfg = SdkConfigSchema.parse({ + client: { command: 'tsx fixture.ts' } + }); + expect(cfg.client?.command).toBe('tsx fixture.ts'); + expect(cfg.server).toBeUndefined(); + }); + + it('rejects server config without a url', () => { + expect(() => + SdkConfigSchema.parse({ server: { command: 'tsx server.ts' } }) + ).toThrow(); + }); +}); + +describe('loadSdkConfig', () => { + it('loads conformance.config.yaml from a directory', async () => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'sdk-cfg-')); + try { + await fs.writeFile( + path.join(dir, 'conformance.config.yaml'), + [ + 'build: npm ci && npm run build', + 'client:', + ' command: tsx test/client.ts', + 'server:', + ' command: tsx test/server.ts', + ' url: http://localhost:3000/mcp', + 'expectedFailures: baseline.yml' + ].join('\n') + ); + const cfg = await loadSdkConfig(dir); + expect(cfg).toEqual({ + build: 'npm ci && npm run build', + client: { command: 'tsx test/client.ts' }, + server: { + command: 'tsx test/server.ts', + url: 'http://localhost:3000/mcp' + }, + expectedFailures: 'baseline.yml' + }); + } finally { + await fs.rm(dir, { recursive: true, force: true }); + } + }); + + it('returns null when no config file is present', async () => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'sdk-cfg-')); + try { + expect(await loadSdkConfig(dir)).toBeNull(); + } finally { + await fs.rm(dir, { recursive: true, force: true }); + } + }); +}); + +describe('lookupBuiltinConfig', () => { + it('finds an SDK by bare name', () => { + expect(lookupBuiltinConfig('typescript-sdk')?.client?.command).toBeTruthy(); + }); + + it('strips owner/ prefix and path segments', () => { + expect(lookupBuiltinConfig('modelcontextprotocol/typescript-sdk')).toBe( + KNOWN_SDKS['typescript-sdk'] + ); + expect(lookupBuiltinConfig('/some/path/to/go-sdk')).toBe( + KNOWN_SDKS['go-sdk'] + ); + }); + + it('returns null for unknown SDKs', () => { + expect(lookupBuiltinConfig('rust-sdk')).toBeNull(); + }); + + it('every built-in entry validates against SdkConfigSchema', () => { + for (const [name, cfg] of Object.entries(KNOWN_SDKS)) { + expect(() => SdkConfigSchema.parse(cfg), name).not.toThrow(); + } + }); +}); diff --git a/vitest.config.ts b/vitest.config.ts index 93242b59..b0f36f97 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -5,7 +5,7 @@ export default defineConfig({ globals: true, environment: 'node', include: ['**/*.test.ts'], - exclude: ['**/node_modules/**', 'dist'], + exclude: ['**/node_modules/**', 'dist', '.sdk-under-test'], // Run test files sequentially to avoid port conflicts fileParallelism: false, // Increase timeout for server tests in CI