From 83a343695e1dcac6ab08bfc1eedcffe70fb92e8f Mon Sep 17 00:00:00 2001 From: robgruen Date: Fri, 10 Apr 2026 17:59:28 -0700 Subject: [PATCH 1/4] Add JsonSchemaType alias replacing 'any' in StructuredOutputJsonSchema and FunctionCallingJsonSchema Introduces a named Record type that documents the intent (an opaque JSON Schema object for the OpenAI API) while being more type-safe than raw any. Co-Authored-By: Claude Sonnet 4.6 --- TODO.md | 2 - ts/packages/aiclient/src/models.ts | 7 +++- ts/packages/aiclient/test/models.spec.ts | 53 ++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 ts/packages/aiclient/test/models.spec.ts diff --git a/TODO.md b/TODO.md index 028bb9c1b..404846d9d 100644 --- a/TODO.md +++ b/TODO.md @@ -153,8 +153,6 @@ This file collates all TODO comments found across the repository, organized by t | `ts/packages/agents/spelunker/src/typescriptChunker.ts` | 198 | Move to caller? | Low | High | Local | Fix | No | | `ts/packages/agents/video/src/videoActionHandler.ts` | 54 | dynamic duration | Medium | Medium | Local | Fix | Yes | | `ts/packages/agents/weather/src/weatherActionHandler.ts` | 227 | Add more sophisticated validation: | Medium | High | Local | Fix | No | -| `ts/packages/aiclient/src/models.ts` | 31 | JsonSchemaType | Low | High | Local | Fix | No | -| `ts/packages/aiclient/src/models.ts` | 39 | JsonSchemaType | Low | High | Local | Fix | No | | `ts/packages/aiclient/src/models.ts` | 176 | add support for videos | High | Medium | Component | Fix | Yes | | `ts/packages/aiclient/src/openai.ts` | 580 | remove after API endpoint correctly handles this case | Low | Medium | Local | Fix | Yes | | `ts/packages/aiclient/src/tokenCounter.ts` | 58 | intermittently cache these with the session | Medium | Medium | Component | Fix | Yes | diff --git a/ts/packages/aiclient/src/models.ts b/ts/packages/aiclient/src/models.ts index 4bdddd192..dbfa04716 100644 --- a/ts/packages/aiclient/src/models.ts +++ b/ts/packages/aiclient/src/models.ts @@ -24,11 +24,14 @@ export type CompletionSettings = { verbosity?: "low" | "medium" | "high"; }; +// A JSON Schema object passed opaquely to the OpenAI API. +export type JsonSchemaType = Record; + export type StructuredOutputJsonSchema = { name: string; description?: string; strict?: true; - schema: any; // TODO: JsonSchemaType + schema: JsonSchemaType; }; export type FunctionCallingJsonSchema = { @@ -36,7 +39,7 @@ export type FunctionCallingJsonSchema = { function: { name: string; description?: string; - parameters?: any; // TODO: JsonSchemaType + parameters?: JsonSchemaType; strict?: true; }; }; diff --git a/ts/packages/aiclient/test/models.spec.ts b/ts/packages/aiclient/test/models.spec.ts new file mode 100644 index 000000000..145914bc0 --- /dev/null +++ b/ts/packages/aiclient/test/models.spec.ts @@ -0,0 +1,53 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { + JsonSchemaType, + StructuredOutputJsonSchema, + FunctionCallingJsonSchema, +} from "../src/models.js"; + +describe("models.JsonSchemaType", () => { + test("JsonSchemaType accepts a plain object schema", () => { + const schema: JsonSchemaType = { + type: "object", + properties: { name: { type: "string" } }, + required: ["name"], + additionalProperties: false, + }; + expect(schema["type"]).toBe("object"); + }); + + test("StructuredOutputJsonSchema holds a schema object", () => { + const schema: JsonSchemaType = { type: "object", properties: {} }; + const structured: StructuredOutputJsonSchema = { + name: "MyAction", + strict: true, + schema, + }; + expect(structured.name).toBe("MyAction"); + expect(structured.schema["type"]).toBe("object"); + }); + + test("FunctionCallingJsonSchema holds optional parameters schema", () => { + const fn: FunctionCallingJsonSchema = { + type: "function", + function: { + name: "doSomething", + description: "Does something", + parameters: { type: "object", properties: {} }, + strict: true, + }, + }; + expect(fn.function.name).toBe("doSomething"); + expect(fn.function.parameters?.["type"]).toBe("object"); + }); + + test("FunctionCallingJsonSchema allows omitting parameters", () => { + const fn: FunctionCallingJsonSchema = { + type: "function", + function: { name: "noArgs" }, + }; + expect(fn.function.parameters).toBeUndefined(); + }); +}); From 731f104e53f2b5fd1b232aa39cc8f7d726df401c Mon Sep 17 00:00:00 2001 From: robgruen Date: Sun, 12 Apr 2026 17:47:34 -0700 Subject: [PATCH 2/4] Add exponential backoff to telemetry logger retry loops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both CosmosDB and MongoDB sinks retried immediately on transient errors (e.g. ENOTFOUND when offline). Now each retry waits an exponentially increasing delay (1 s, 2 s, …) before the next attempt, reducing unnecessary network hammering when connectivity is lost. Co-Authored-By: Claude Sonnet 4.6 --- TODO.md | 3 --- ts/packages/telemetry/src/logger/cosmosDBLoggerSink.ts | 10 ++++++++-- ts/packages/telemetry/src/logger/mongoLoggerSink.ts | 10 ++++++++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/TODO.md b/TODO.md index 404846d9d..614164845 100644 --- a/TODO.md +++ b/TODO.md @@ -294,9 +294,6 @@ This file collates all TODO comments found across the repository, organized by t | `ts/packages/shell/test/configCommands.spec.ts` | 87 | Test action correction | Medium | Medium | Component | Fix | Yes | | `ts/packages/shell/test/sessionCommands.spec.ts` | 194 | Test action correction | Medium | Medium | Component | Fix | Yes | | `ts/packages/shell/test/testHelper.ts` | 236 | fix completion to not need this workaround | Medium | Medium | Local | Fix | No | -| `ts/packages/telemetry/src/logger/cosmosDBLoggerSink.ts` | 55 | add backoff/queuing logic for ENOTFOUND (no internet) | Medium | High | Local | Fix | No | -| `ts/packages/telemetry/src/logger/mongoLoggerSink.ts` | 41 | add backoff/queuing logic for ENOTFOUND (no internet) | Medium | High | Local | Fix | No | -| `ts/packages/typeagent/src/storage/embeddingFS.ts` | 158 | parallelize | Low | High | Local | Fix | No | | `ts/packages/typeagent/src/vector/vectorIndex.ts` | 176 | batch operations | Medium | High | Local | Fix | No | | `ts/packages/utils/typechatUtils/src/location.ts` | 138 | update any once @azure-rest/maps-search incorporates V1 return types | Low | Low | Local | No Fix | Yes | | `ts/packages/utils/typechatUtils/src/location.ts` | 154 | handle more result types | Medium | Medium | Local | Fix | Yes | diff --git a/ts/packages/telemetry/src/logger/cosmosDBLoggerSink.ts b/ts/packages/telemetry/src/logger/cosmosDBLoggerSink.ts index 4cc41c609..6ffa79f15 100644 --- a/ts/packages/telemetry/src/logger/cosmosDBLoggerSink.ts +++ b/ts/packages/telemetry/src/logger/cosmosDBLoggerSink.ts @@ -46,13 +46,13 @@ class CosmosDBLoggerSink implements LoggerSink { private async upload() { let attempt = 0; + let delay = UPLOAD_DELAY; while (attempt < MAX_RETRY) { attempt++; try { await this.drain(); break; } catch (e: any) { - // TODO: add backoff/queuing logic for ENOTFOUND (no internet) if ( typeof e.message === "string" && (e.message.includes("Invalid key") || @@ -72,8 +72,14 @@ class CosmosDBLoggerSink implements LoggerSink { break; } - // Retry + // Retry with exponential backoff (handles ENOTFOUND and other transient errors) debugCosmos(`ERROR: ${e}`); + if (attempt < MAX_RETRY) { + await new Promise((resolve) => + setTimeout(resolve, delay), + ); + delay *= 2; + } } } // Clear the timeout so it can schedule after the next log event. diff --git a/ts/packages/telemetry/src/logger/mongoLoggerSink.ts b/ts/packages/telemetry/src/logger/mongoLoggerSink.ts index 68530a3ad..47b92b4eb 100644 --- a/ts/packages/telemetry/src/logger/mongoLoggerSink.ts +++ b/ts/packages/telemetry/src/logger/mongoLoggerSink.ts @@ -32,13 +32,13 @@ class MongoDBLoggerSink implements LoggerSink { private async upload() { let attempt = 0; + let delay = UPLOAD_DELAY; while (attempt < MAX_RETRY) { attempt++; try { await this.drain(); break; } catch (e: any) { - // TODO: add backoff/queuing logic for ENOTFOUND (no internet) if ( typeof e.message === "string" && e.message.includes("Invalid key") @@ -56,8 +56,14 @@ class MongoDBLoggerSink implements LoggerSink { break; } - // Retry + // Retry with exponential backoff (handles ENOTFOUND and other transient errors) debugMongo(`ERROR: ${e}`); + if (attempt < MAX_RETRY) { + await new Promise((resolve) => + setTimeout(resolve, delay), + ); + delay *= 2; + } } } // Clear the timeout so it can schedule after the next log event. From 5a848025c06d681545eb75a08fb6e49cc28a46e6 Mon Sep 17 00:00:00 2001 From: robgruen Date: Fri, 10 Apr 2026 17:14:04 -0700 Subject: [PATCH 3/4] Parallelize embeddingFS.loadEntriesSubset using asyncArray.mapAsync The function was loading entries sequentially with a for-loop while the sibling loadEntries() already used asyncArray.mapAsync with the folder's concurrency setting. Apply the same pattern to loadEntriesSubset. Co-Authored-By: Claude Sonnet 4.6 --- TODO.md | 4 +- .../typeagent/src/storage/embeddingFS.ts | 16 ++-- .../test/storage.embeddingFS.spec.ts | 83 +++++++++++++++++++ 3 files changed, 95 insertions(+), 8 deletions(-) create mode 100644 ts/packages/typeagent/test/storage.embeddingFS.spec.ts diff --git a/TODO.md b/TODO.md index 614164845..26157a61d 100644 --- a/TODO.md +++ b/TODO.md @@ -278,8 +278,6 @@ This file collates all TODO comments found across the repository, organized by t | `ts/packages/memory/image/src/imageMeta.ts` | 151 | image taker name | Low | High | Local | Fix | No | | `ts/packages/memory/image/src/imageMeta.ts` | 356 | Ensure localization | Medium | Medium | Local | Fix | No | | `ts/packages/memory/image/src/indexingService.ts` | 5 | add support for "monitoring" the indexed folder for changes | High | Medium | Component | Fix | No | -| `ts/packages/memory/image/src/indexingService.ts` | 23 | add token stats | Low | High | Local | Fix | No | -| `ts/packages/memory/image/src/indexingService.ts` | 132 | make this less chatty - maybe percentage based or something? | Low | High | Local | Fix | No | | `ts/packages/memory/storage/src/azSearch/azQuery.ts` | 90 | handle related terms | Medium | Medium | Component | Fix | No | | `ts/packages/memory/website/src/websiteCollection.ts` | 2646 | If we have access to Graphology graphs, compute more advanced metrics | High | Low | Component | No Fix | Yes | | `ts/packages/memory/website/src/websiteCollection.ts` | 2709 | In a full implementation, this would: | High | Low | Component | No Fix | Yes | @@ -294,6 +292,8 @@ This file collates all TODO comments found across the repository, organized by t | `ts/packages/shell/test/configCommands.spec.ts` | 87 | Test action correction | Medium | Medium | Component | Fix | Yes | | `ts/packages/shell/test/sessionCommands.spec.ts` | 194 | Test action correction | Medium | Medium | Component | Fix | Yes | | `ts/packages/shell/test/testHelper.ts` | 236 | fix completion to not need this workaround | Medium | Medium | Local | Fix | No | +| `ts/packages/telemetry/src/logger/cosmosDBLoggerSink.ts` | 55 | add backoff/queuing logic for ENOTFOUND (no internet) | Medium | High | Local | Fix | No | +| `ts/packages/telemetry/src/logger/mongoLoggerSink.ts` | 41 | add backoff/queuing logic for ENOTFOUND (no internet) | Medium | High | Local | Fix | No | | `ts/packages/typeagent/src/vector/vectorIndex.ts` | 176 | batch operations | Medium | High | Local | Fix | No | | `ts/packages/utils/typechatUtils/src/location.ts` | 138 | update any once @azure-rest/maps-search incorporates V1 return types | Low | Low | Local | No Fix | Yes | | `ts/packages/utils/typechatUtils/src/location.ts` | 154 | handle more result types | Medium | Medium | Local | Fix | Yes | diff --git a/ts/packages/typeagent/src/storage/embeddingFS.ts b/ts/packages/typeagent/src/storage/embeddingFS.ts index 487ee438c..6bd4783ac 100644 --- a/ts/packages/typeagent/src/storage/embeddingFS.ts +++ b/ts/packages/typeagent/src/storage/embeddingFS.ts @@ -155,13 +155,17 @@ export async function createEmbeddingFolder( names: string[]; embeddings: Embedding[]; }> { - // TODO: parallelize - let names: string[] = []; - let embeddings: Embedding[] = []; - for (const name of nameSubset) { - const entry = await folder.get(name); + const loaded = await asyncArray.mapAsync( + nameSubset, + concurrency!, + (name) => folder.get(name), + ); + const names: string[] = []; + const embeddings: Embedding[] = []; + for (let i = 0; i < nameSubset.length; i++) { + const entry = loaded[i]; if (entry) { - names.push(name); + names.push(nameSubset[i]); embeddings.push(entry); } } diff --git a/ts/packages/typeagent/test/storage.embeddingFS.spec.ts b/ts/packages/typeagent/test/storage.embeddingFS.spec.ts new file mode 100644 index 000000000..2992d1b5b --- /dev/null +++ b/ts/packages/typeagent/test/storage.embeddingFS.spec.ts @@ -0,0 +1,83 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { removeDir } from "../src/objStream.js"; +import { createEmbeddingFolder } from "../src/storage/embeddingFS.js"; +import { SimilarityType } from "../src/vector/embeddings.js"; +import { generateRandomEmbedding, testDirectoryPath } from "./common.js"; + +describe("storage.embeddingFS", () => { + const storePath = testDirectoryPath("embeddingFS_test"); + + beforeEach(async () => { + await removeDir(storePath); + }); + + test("nearestNeighborsInSubset returns closest embedding from subset", async () => { + const folder = await createEmbeddingFolder(storePath); + const dim = 16; + + // Store 4 embeddings + const a = generateRandomEmbedding(dim); + const b = generateRandomEmbedding(dim); + const c = generateRandomEmbedding(dim); + const d = generateRandomEmbedding(dim); + await folder.put(a, "a"); + await folder.put(b, "b"); + await folder.put(c, "c"); + await folder.put(d, "d"); + + // Query with embedding identical to "b" + const results = await folder.nearestNeighborsInSubset( + b, + ["a", "b", "c"], + 3, + SimilarityType.Cosine, + ); + + // "b" should be first with score ~1 + expect(results.length).toBeGreaterThan(0); + expect(results[0].item).toBe("b"); + expect(results[0].score).toBeCloseTo(1, 4); + // "d" is not in the subset so should not appear + const items = results.map((r) => r.item); + expect(items).not.toContain("d"); + }); + + test("nearestNeighborsInSubset handles subset that excludes some stored items", async () => { + const folder = await createEmbeddingFolder(storePath); + const dim = 8; + + const e1 = generateRandomEmbedding(dim); + const e2 = generateRandomEmbedding(dim); + const e3 = generateRandomEmbedding(dim); + await folder.put(e1, "e1"); + await folder.put(e2, "e2"); + await folder.put(e3, "e3"); + + // Only search subset ["e1", "e3"] — e2 should never appear + const results = await folder.nearestNeighborsInSubset( + e1, + ["e1", "e3"], + 2, + SimilarityType.Cosine, + ); + const items = results.map((r) => r.item); + expect(items).not.toContain("e2"); + expect(items).toContain("e1"); + }); + + test("nearestNeighborsInSubset returns empty for empty subset", async () => { + const folder = await createEmbeddingFolder(storePath); + const e = generateRandomEmbedding(8); + await folder.put(e, "only"); + + const results = await folder.nearestNeighborsInSubset( + e, + [], + 3, + SimilarityType.Cosine, + ); + expect(results).toHaveLength(0); + }); +}); From 31dff8d9ba96580e337e33b61fef732239638224 Mon Sep 17 00:00:00 2001 From: robgruen Date: Sun, 12 Apr 2026 19:35:38 -0700 Subject: [PATCH 4/4] Optimize HitTable.getTopK to skip sort for k<1 and handle k>size Two improvements to the getTopK function: - When k < 1 (return-all case), skip the O(n log n) sort entirely and return map values directly in O(n). - Change `k === map.size` guard to `k >= map.size` so over-sized k values also short-circuit correctly without running the score-level loop. Co-Authored-By: Claude Sonnet 4.6 --- TODO.md | 5 +++++ ts/packages/knowledgeProcessor/src/setOperations.ts | 11 +++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/TODO.md b/TODO.md index 26157a61d..028bb9c1b 100644 --- a/TODO.md +++ b/TODO.md @@ -153,6 +153,8 @@ This file collates all TODO comments found across the repository, organized by t | `ts/packages/agents/spelunker/src/typescriptChunker.ts` | 198 | Move to caller? | Low | High | Local | Fix | No | | `ts/packages/agents/video/src/videoActionHandler.ts` | 54 | dynamic duration | Medium | Medium | Local | Fix | Yes | | `ts/packages/agents/weather/src/weatherActionHandler.ts` | 227 | Add more sophisticated validation: | Medium | High | Local | Fix | No | +| `ts/packages/aiclient/src/models.ts` | 31 | JsonSchemaType | Low | High | Local | Fix | No | +| `ts/packages/aiclient/src/models.ts` | 39 | JsonSchemaType | Low | High | Local | Fix | No | | `ts/packages/aiclient/src/models.ts` | 176 | add support for videos | High | Medium | Component | Fix | Yes | | `ts/packages/aiclient/src/openai.ts` | 580 | remove after API endpoint correctly handles this case | Low | Medium | Local | Fix | Yes | | `ts/packages/aiclient/src/tokenCounter.ts` | 58 | intermittently cache these with the session | Medium | Medium | Component | Fix | Yes | @@ -278,6 +280,8 @@ This file collates all TODO comments found across the repository, organized by t | `ts/packages/memory/image/src/imageMeta.ts` | 151 | image taker name | Low | High | Local | Fix | No | | `ts/packages/memory/image/src/imageMeta.ts` | 356 | Ensure localization | Medium | Medium | Local | Fix | No | | `ts/packages/memory/image/src/indexingService.ts` | 5 | add support for "monitoring" the indexed folder for changes | High | Medium | Component | Fix | No | +| `ts/packages/memory/image/src/indexingService.ts` | 23 | add token stats | Low | High | Local | Fix | No | +| `ts/packages/memory/image/src/indexingService.ts` | 132 | make this less chatty - maybe percentage based or something? | Low | High | Local | Fix | No | | `ts/packages/memory/storage/src/azSearch/azQuery.ts` | 90 | handle related terms | Medium | Medium | Component | Fix | No | | `ts/packages/memory/website/src/websiteCollection.ts` | 2646 | If we have access to Graphology graphs, compute more advanced metrics | High | Low | Component | No Fix | Yes | | `ts/packages/memory/website/src/websiteCollection.ts` | 2709 | In a full implementation, this would: | High | Low | Component | No Fix | Yes | @@ -294,6 +298,7 @@ This file collates all TODO comments found across the repository, organized by t | `ts/packages/shell/test/testHelper.ts` | 236 | fix completion to not need this workaround | Medium | Medium | Local | Fix | No | | `ts/packages/telemetry/src/logger/cosmosDBLoggerSink.ts` | 55 | add backoff/queuing logic for ENOTFOUND (no internet) | Medium | High | Local | Fix | No | | `ts/packages/telemetry/src/logger/mongoLoggerSink.ts` | 41 | add backoff/queuing logic for ENOTFOUND (no internet) | Medium | High | Local | Fix | No | +| `ts/packages/typeagent/src/storage/embeddingFS.ts` | 158 | parallelize | Low | High | Local | Fix | No | | `ts/packages/typeagent/src/vector/vectorIndex.ts` | 176 | batch operations | Medium | High | Local | Fix | No | | `ts/packages/utils/typechatUtils/src/location.ts` | 138 | update any once @azure-rest/maps-search incorporates V1 return types | Low | Low | Local | No Fix | Yes | | `ts/packages/utils/typechatUtils/src/location.ts` | 154 | handle more result types | Medium | Medium | Local | Fix | Yes | diff --git a/ts/packages/knowledgeProcessor/src/setOperations.ts b/ts/packages/knowledgeProcessor/src/setOperations.ts index 96315a732..b14d75f31 100644 --- a/ts/packages/knowledgeProcessor/src/setOperations.ts +++ b/ts/packages/knowledgeProcessor/src/setOperations.ts @@ -603,23 +603,26 @@ export function createHitTable( return top; } - // TODO: Optimize. /** * Return the items with the 'k' highest scores * @param k if <= 0, returns all * @returns array of items */ function getTopK(k: number): T[] { + if (k < 1) { + // All items requested; skip the expensive sort + return [...map.values()].map((i) => i.item); + } const topItems = byHighestScore(); - if (k === map.size || k <= 0) { + if (k >= map.size) { return topItems.map((i) => i.item); } const topK: T[] = []; - if (k < 1 || topItems.length === 0) { + if (topItems.length === 0) { return topK; } - // Stop when we have matched k highest scores + // Stop when we have matched k highest score levels let prevScore = topItems[0].score; let kCount = 1; for (let i = 0; i < topItems.length; ++i) {