profullstack · ralyodio · May 20, 2026 · May 20, 2026
diff --git a/packages/ai/wandb/src/index.test.ts b/packages/ai/wandb/src/index.test.ts
@@ -1,4 +1,119 @@
 import { smokeTest } from '@profullstack/sh1pt-core/testing';
+import { afterEach, describe, expect, it, vi } from 'vitest';
 import adapter from './index.js';
 
 smokeTest(adapter, { idPrefix: 'ai' });
+
+const ctx = (
+  secrets: Record<string, string> = { WANDB_API_KEY: 'test-key' },
+  dryRun = false,
+) => ({
+  secret: (key: string) => secrets[key],
+  log: () => {},
+  dryRun,
+});
+
+describe('W&B Inference generation', () => {
+  afterEach(() => {
+    vi.unstubAllGlobals();
+  });
+
+  it('short-circuits dry-run before network calls', async () => {
+    const fetchMock = vi.fn();
+    vi.stubGlobal('fetch', fetchMock);
+
+    const result = await adapter.generate(
+      ctx({ WANDB_API_KEY: 'test-key' }, true),
+      'hello',
+      {},
+      {},
+    );
+
+    expect(result).toEqual({ text: '[dry-run]', model: 'meta-llama/Llama-3.1-8B-Instruct' });
+    expect(fetchMock).not.toHaveBeenCalled();
+  });
+
+  it('posts chat completions requests and maps usage tokens', async () => {
+    const fetchMock = vi.fn().mockResolvedValue({
+      ok: true,
+      json: async () => ({
+        model: 'meta-llama/Llama-3.1-8B-Instruct',
+        choices: [{ message: { role: 'assistant', content: 'hi from wandb' } }],
+        usage: { prompt_tokens: 11, completion_tokens: 3, total_tokens: 14 },
+      }),
+    });
+    vi.stubGlobal('fetch', fetchMock);
+
+    const result = await adapter.generate(
+      ctx(),
+      'hello',
+      {
+        system: 'be direct',
+        maxTokens: 70,
+        temperature: 0.5,
+        extra: { top_p: 0.9, request_id: 'req-test' },
+      },
+      { project: 'team/project' },
+    );
+
+    expect(fetchMock).toHaveBeenCalledOnce();
+    const call = fetchMock.mock.calls[0];
+    expect(call).toBeDefined();
+    const [url, request] = call!;
+    expect(url).toBe('https://api.inference.wandb.ai/v1/chat/completions');
+    expect(request.headers.authorization).toBe('Bearer test-key');
+    expect(request.headers['content-type']).toBe('application/json');
+    expect(request.headers['OpenAI-Project']).toBe('team/project');
+    expect(JSON.parse(request.body)).toEqual({
+      model: 'meta-llama/Llama-3.1-8B-Instruct',
+      messages: [
+        { role: 'system', content: 'be direct' },
+        { role: 'user', content: 'hello' },
+      ],
+      stream: false,
+      max_tokens: 70,
+      temperature: 0.5,
+      top_p: 0.9,
+      request_id: 'req-test',
+    });
+    expect(result).toEqual({
+      text: 'hi from wandb',
+      model: 'meta-llama/Llama-3.1-8B-Instruct',
+      inputTokens: 11,
+      outputTokens: 3,
+    });
+  });
+
+  it('supports text-style choices from compatible responses', async () => {
+    vi.stubGlobal('fetch', vi.fn().mockResolvedValue({
+      ok: true,
+      json: async () => ({
+        choices: [{ text: 'legacy text response' }],
+      }),
+    }));
+
+    const result = await adapter.generate(
+      ctx(),
+      'hello',
+      { model: 'deepseek-ai/DeepSeek-V3-0324' },
+      { baseUrl: 'https://wandb.test/v1' },
+    );
+
+    expect(result).toEqual({
+      text: 'legacy text response',
+      model: 'deepseek-ai/DeepSeek-V3-0324',
+    });
+  });
+
+  it('includes status and response body excerpt on errors', async () => {
+    vi.stubGlobal('fetch', vi.fn().mockResolvedValue({
+      ok: false,
+      status: 429,
+      text: async () => 'rate limit exceeded'.repeat(30),
+    }));
+
+    await expect(adapter.generate(ctx(), 'hello', {}, {})).rejects.toThrow(
+      /W&B Inference 429: rate limit exceeded/,
+    );
+  });
+});
diff --git a/packages/ai/wandb/src/index.ts b/packages/ai/wandb/src/index.ts
@@ -2,29 +2,93 @@ import { defineAi, tokenSetup } from '@profullstack/sh1pt-core';
 
 interface Config {
   baseUrl?: string;
+  project?: string;
 }
 
+const DEFAULT_BASE = 'https://api.inference.wandb.ai/v1';
+const DEFAULT_MODEL = 'meta-llama/Llama-3.1-8B-Instruct';
+
 export default defineAi<Config>({
   id: 'ai-wandb',
   label: 'Weights & Biases',
-  defaultModel: 'WANDB_API_KEY',
-  models: ['WANDB_API_KEY'],
-
-  async generate(ctx, prompt, _opts, _config) {
-    const apiKey = ctx.secret('https://wandb.ai');
-    if (!apiKey) throw new Error('https://wandb.ai not in vault — run `sh1pt promote ai setup`');
-    ctx.log(`[stub] ai-wandb · ${prompt.length} chars in — integration pending`);
-    return { text: '[stub — ai-wandb integration not yet implemented]', model: 'WANDB_API_KEY' };
+  defaultModel: DEFAULT_MODEL,
+  models: [
+    DEFAULT_MODEL,
+    'meta-llama/Llama-3.3-70B-Instruct',
+    'deepseek-ai/DeepSeek-V3-0324',
+  ],
+
+  async generate(ctx, prompt, opts, config) {
+    const apiKey = ctx.secret('WANDB_API_KEY');
+    if (!apiKey) throw new Error('WANDB_API_KEY not in vault');
+    const model = opts.model ?? DEFAULT_MODEL;
+    ctx.log(`wandb inference - model=${model} - ${prompt.length} chars in`);
+    if (ctx.dryRun) return { text: '[dry-run]', model };
+
+    const messages: WandbMessage[] = [];
+    if (opts.system) messages.push({ role: 'system', content: opts.system });
+    messages.push({ role: 'user', content: prompt });
+
+    const headers: Record<string, string> = {
+      authorization: `Bearer ${apiKey}`,
+      'content-type': 'application/json',
+    };
+    if (config.project) headers['OpenAI-Project'] = config.project;
+
+    const res = await fetch(`${config.baseUrl ?? DEFAULT_BASE}/chat/completions`, {
+      method: 'POST',
+      headers,
+      body: JSON.stringify({
+        model,
+        messages,
+        stream: false,
+        ...(opts.maxTokens !== undefined ? { max_tokens: opts.maxTokens } : {}),
+        ...(opts.temperature !== undefined ? { temperature: opts.temperature } : {}),
+        ...opts.extra,
+      }),
+    });
+    if (!res.ok) throw new Error(`W&B Inference ${res.status}: ${(await res.text()).slice(0, 200)}`);
+
+    const data = await res.json() as WandbChatResponse;
+    const choice = data.choices[0];
+    return {
+      text: choice?.message?.content ?? choice?.text ?? '',
+      model: data.model ?? model,
+      inputTokens: data.usage?.prompt_tokens,
+      outputTokens: data.usage?.completion_tokens,
+    };
   },
 
   setup: tokenSetup<Config>({
-    secretKey: 'https://wandb.ai',
+    secretKey: 'WANDB_API_KEY',
     label: 'Weights & Biases',
-    vendorDocUrl: '',
+    vendorDocUrl: 'https://docs.wandb.ai/inference/api-reference/chat-completions',
     steps: [
-      'Sign in at  and create an API key',
-      'Copy the key — usually shown once',
+      'Sign in at https://wandb.ai/settings and create an API key',
+      'Copy the key; it is usually shown once',
+      'Optionally configure project as <team>/<project> for W&B usage tracking',
       'Paste below; sh1pt encrypts it in the vault',
     ],
   }),
 });
+
+type WandbRole = 'system' | 'user' | 'assistant' | 'tool';
+
+interface WandbMessage {
+  role: WandbRole;
+  content: string;
+}
+
+interface WandbChatResponse {
+  model?: string;
+  choices: Array<{
+    message?: {
+      content?: string;
+    };
+    text?: string;
+  }>;
+  usage?: {
+    prompt_tokens?: number;
+    completion_tokens?: number;
+  };
+}