diff --git a/ts/packages/aiclient/src/models.ts b/ts/packages/aiclient/src/models.ts index 4bdddd1927..dbfa047166 100644 --- a/ts/packages/aiclient/src/models.ts +++ b/ts/packages/aiclient/src/models.ts @@ -24,11 +24,14 @@ export type CompletionSettings = { verbosity?: "low" | "medium" | "high"; }; +// A JSON Schema object passed opaquely to the OpenAI API. +export type JsonSchemaType = Record; + export type StructuredOutputJsonSchema = { name: string; description?: string; strict?: true; - schema: any; // TODO: JsonSchemaType + schema: JsonSchemaType; }; export type FunctionCallingJsonSchema = { @@ -36,7 +39,7 @@ export type FunctionCallingJsonSchema = { function: { name: string; description?: string; - parameters?: any; // TODO: JsonSchemaType + parameters?: JsonSchemaType; strict?: true; }; }; diff --git a/ts/packages/aiclient/test/models.spec.ts b/ts/packages/aiclient/test/models.spec.ts new file mode 100644 index 0000000000..145914bc04 --- /dev/null +++ b/ts/packages/aiclient/test/models.spec.ts @@ -0,0 +1,53 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { + JsonSchemaType, + StructuredOutputJsonSchema, + FunctionCallingJsonSchema, +} from "../src/models.js"; + +describe("models.JsonSchemaType", () => { + test("JsonSchemaType accepts a plain object schema", () => { + const schema: JsonSchemaType = { + type: "object", + properties: { name: { type: "string" } }, + required: ["name"], + additionalProperties: false, + }; + expect(schema["type"]).toBe("object"); + }); + + test("StructuredOutputJsonSchema holds a schema object", () => { + const schema: JsonSchemaType = { type: "object", properties: {} }; + const structured: StructuredOutputJsonSchema = { + name: "MyAction", + strict: true, + schema, + }; + expect(structured.name).toBe("MyAction"); + expect(structured.schema["type"]).toBe("object"); + }); + + test("FunctionCallingJsonSchema holds optional parameters schema", () => { + const fn: FunctionCallingJsonSchema = { + type: "function", + function: { + name: "doSomething", + description: "Does something", + parameters: { type: "object", properties: {} }, + strict: true, + }, + }; + expect(fn.function.name).toBe("doSomething"); + expect(fn.function.parameters?.["type"]).toBe("object"); + }); + + test("FunctionCallingJsonSchema allows omitting parameters", () => { + const fn: FunctionCallingJsonSchema = { + type: "function", + function: { name: "noArgs" }, + }; + expect(fn.function.parameters).toBeUndefined(); + }); +}); diff --git a/ts/packages/knowledgeProcessor/src/setOperations.ts b/ts/packages/knowledgeProcessor/src/setOperations.ts index 96315a7322..b14d75f315 100644 --- a/ts/packages/knowledgeProcessor/src/setOperations.ts +++ b/ts/packages/knowledgeProcessor/src/setOperations.ts @@ -603,23 +603,26 @@ export function createHitTable( return top; } - // TODO: Optimize. /** * Return the items with the 'k' highest scores * @param k if <= 0, returns all * @returns array of items */ function getTopK(k: number): T[] { + if (k < 1) { + // All items requested; skip the expensive sort + return [...map.values()].map((i) => i.item); + } const topItems = byHighestScore(); - if (k === map.size || k <= 0) { + if (k >= map.size) { return topItems.map((i) => i.item); } const topK: T[] = []; - if (k < 1 || topItems.length === 0) { + if (topItems.length === 0) { return topK; } - // Stop when we have matched k highest scores + // Stop when we have matched k highest score levels let prevScore = topItems[0].score; let kCount = 1; for (let i = 0; i < topItems.length; ++i) { diff --git a/ts/packages/telemetry/src/logger/cosmosDBLoggerSink.ts b/ts/packages/telemetry/src/logger/cosmosDBLoggerSink.ts index 4cc41c609d..6ffa79f15b 100644 --- a/ts/packages/telemetry/src/logger/cosmosDBLoggerSink.ts +++ b/ts/packages/telemetry/src/logger/cosmosDBLoggerSink.ts @@ -46,13 +46,13 @@ class CosmosDBLoggerSink implements LoggerSink { private async upload() { let attempt = 0; + let delay = UPLOAD_DELAY; while (attempt < MAX_RETRY) { attempt++; try { await this.drain(); break; } catch (e: any) { - // TODO: add backoff/queuing logic for ENOTFOUND (no internet) if ( typeof e.message === "string" && (e.message.includes("Invalid key") || @@ -72,8 +72,14 @@ class CosmosDBLoggerSink implements LoggerSink { break; } - // Retry + // Retry with exponential backoff (handles ENOTFOUND and other transient errors) debugCosmos(`ERROR: ${e}`); + if (attempt < MAX_RETRY) { + await new Promise((resolve) => + setTimeout(resolve, delay), + ); + delay *= 2; + } } } // Clear the timeout so it can schedule after the next log event. diff --git a/ts/packages/telemetry/src/logger/mongoLoggerSink.ts b/ts/packages/telemetry/src/logger/mongoLoggerSink.ts index 68530a3ad8..47b92b4eb4 100644 --- a/ts/packages/telemetry/src/logger/mongoLoggerSink.ts +++ b/ts/packages/telemetry/src/logger/mongoLoggerSink.ts @@ -32,13 +32,13 @@ class MongoDBLoggerSink implements LoggerSink { private async upload() { let attempt = 0; + let delay = UPLOAD_DELAY; while (attempt < MAX_RETRY) { attempt++; try { await this.drain(); break; } catch (e: any) { - // TODO: add backoff/queuing logic for ENOTFOUND (no internet) if ( typeof e.message === "string" && e.message.includes("Invalid key") @@ -56,8 +56,14 @@ class MongoDBLoggerSink implements LoggerSink { break; } - // Retry + // Retry with exponential backoff (handles ENOTFOUND and other transient errors) debugMongo(`ERROR: ${e}`); + if (attempt < MAX_RETRY) { + await new Promise((resolve) => + setTimeout(resolve, delay), + ); + delay *= 2; + } } } // Clear the timeout so it can schedule after the next log event. diff --git a/ts/packages/typeagent/src/storage/embeddingFS.ts b/ts/packages/typeagent/src/storage/embeddingFS.ts index 487ee438cc..6bd4783ac4 100644 --- a/ts/packages/typeagent/src/storage/embeddingFS.ts +++ b/ts/packages/typeagent/src/storage/embeddingFS.ts @@ -155,13 +155,17 @@ export async function createEmbeddingFolder( names: string[]; embeddings: Embedding[]; }> { - // TODO: parallelize - let names: string[] = []; - let embeddings: Embedding[] = []; - for (const name of nameSubset) { - const entry = await folder.get(name); + const loaded = await asyncArray.mapAsync( + nameSubset, + concurrency!, + (name) => folder.get(name), + ); + const names: string[] = []; + const embeddings: Embedding[] = []; + for (let i = 0; i < nameSubset.length; i++) { + const entry = loaded[i]; if (entry) { - names.push(name); + names.push(nameSubset[i]); embeddings.push(entry); } } diff --git a/ts/packages/typeagent/test/storage.embeddingFS.spec.ts b/ts/packages/typeagent/test/storage.embeddingFS.spec.ts new file mode 100644 index 0000000000..2992d1b5ba --- /dev/null +++ b/ts/packages/typeagent/test/storage.embeddingFS.spec.ts @@ -0,0 +1,83 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { removeDir } from "../src/objStream.js"; +import { createEmbeddingFolder } from "../src/storage/embeddingFS.js"; +import { SimilarityType } from "../src/vector/embeddings.js"; +import { generateRandomEmbedding, testDirectoryPath } from "./common.js"; + +describe("storage.embeddingFS", () => { + const storePath = testDirectoryPath("embeddingFS_test"); + + beforeEach(async () => { + await removeDir(storePath); + }); + + test("nearestNeighborsInSubset returns closest embedding from subset", async () => { + const folder = await createEmbeddingFolder(storePath); + const dim = 16; + + // Store 4 embeddings + const a = generateRandomEmbedding(dim); + const b = generateRandomEmbedding(dim); + const c = generateRandomEmbedding(dim); + const d = generateRandomEmbedding(dim); + await folder.put(a, "a"); + await folder.put(b, "b"); + await folder.put(c, "c"); + await folder.put(d, "d"); + + // Query with embedding identical to "b" + const results = await folder.nearestNeighborsInSubset( + b, + ["a", "b", "c"], + 3, + SimilarityType.Cosine, + ); + + // "b" should be first with score ~1 + expect(results.length).toBeGreaterThan(0); + expect(results[0].item).toBe("b"); + expect(results[0].score).toBeCloseTo(1, 4); + // "d" is not in the subset so should not appear + const items = results.map((r) => r.item); + expect(items).not.toContain("d"); + }); + + test("nearestNeighborsInSubset handles subset that excludes some stored items", async () => { + const folder = await createEmbeddingFolder(storePath); + const dim = 8; + + const e1 = generateRandomEmbedding(dim); + const e2 = generateRandomEmbedding(dim); + const e3 = generateRandomEmbedding(dim); + await folder.put(e1, "e1"); + await folder.put(e2, "e2"); + await folder.put(e3, "e3"); + + // Only search subset ["e1", "e3"] — e2 should never appear + const results = await folder.nearestNeighborsInSubset( + e1, + ["e1", "e3"], + 2, + SimilarityType.Cosine, + ); + const items = results.map((r) => r.item); + expect(items).not.toContain("e2"); + expect(items).toContain("e1"); + }); + + test("nearestNeighborsInSubset returns empty for empty subset", async () => { + const folder = await createEmbeddingFolder(storePath); + const e = generateRandomEmbedding(8); + await folder.put(e, "only"); + + const results = await folder.nearestNeighborsInSubset( + e, + [], + 3, + SimilarityType.Cosine, + ); + expect(results).toHaveLength(0); + }); +});