Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions ts/packages/aiclient/src/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,22 @@ export type CompletionSettings = {
verbosity?: "low" | "medium" | "high";
};

// A JSON Schema object passed opaquely to the OpenAI API.
export type JsonSchemaType = Record<string, unknown>;

export type StructuredOutputJsonSchema = {
name: string;
description?: string;
strict?: true;
schema: any; // TODO: JsonSchemaType
schema: JsonSchemaType;
};

export type FunctionCallingJsonSchema = {
type: "function";
function: {
name: string;
description?: string;
parameters?: any; // TODO: JsonSchemaType
parameters?: JsonSchemaType;
strict?: true;
};
};
Expand Down
53 changes: 53 additions & 0 deletions ts/packages/aiclient/test/models.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

import {
JsonSchemaType,
StructuredOutputJsonSchema,
FunctionCallingJsonSchema,
} from "../src/models.js";

describe("models.JsonSchemaType", () => {
test("JsonSchemaType accepts a plain object schema", () => {
const schema: JsonSchemaType = {
type: "object",
properties: { name: { type: "string" } },
required: ["name"],
additionalProperties: false,
};
expect(schema["type"]).toBe("object");
});

test("StructuredOutputJsonSchema holds a schema object", () => {
const schema: JsonSchemaType = { type: "object", properties: {} };
const structured: StructuredOutputJsonSchema = {
name: "MyAction",
strict: true,
schema,
};
expect(structured.name).toBe("MyAction");
expect(structured.schema["type"]).toBe("object");
});

test("FunctionCallingJsonSchema holds optional parameters schema", () => {
const fn: FunctionCallingJsonSchema = {
type: "function",
function: {
name: "doSomething",
description: "Does something",
parameters: { type: "object", properties: {} },
strict: true,
},
};
expect(fn.function.name).toBe("doSomething");
expect(fn.function.parameters?.["type"]).toBe("object");
});

test("FunctionCallingJsonSchema allows omitting parameters", () => {
const fn: FunctionCallingJsonSchema = {
type: "function",
function: { name: "noArgs" },
};
expect(fn.function.parameters).toBeUndefined();
});
});
11 changes: 7 additions & 4 deletions ts/packages/knowledgeProcessor/src/setOperations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -603,23 +603,26 @@ export function createHitTable<T>(
return top;
}

// TODO: Optimize.
/**
* Return the items with the 'k' highest scores
* @param k if <= 0, returns all
* @returns array of items
*/
function getTopK(k: number): T[] {
if (k < 1) {
// All items requested; skip the expensive sort
return [...map.values()].map((i) => i.item);
}
const topItems = byHighestScore();
if (k === map.size || k <= 0) {
if (k >= map.size) {
return topItems.map((i) => i.item);
}

const topK: T[] = [];
if (k < 1 || topItems.length === 0) {
if (topItems.length === 0) {
return topK;
}
// Stop when we have matched k highest scores
// Stop when we have matched k highest score levels
let prevScore = topItems[0].score;
let kCount = 1;
for (let i = 0; i < topItems.length; ++i) {
Expand Down
10 changes: 8 additions & 2 deletions ts/packages/telemetry/src/logger/cosmosDBLoggerSink.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ class CosmosDBLoggerSink implements LoggerSink {

private async upload() {
let attempt = 0;
let delay = UPLOAD_DELAY;
while (attempt < MAX_RETRY) {
attempt++;
try {
await this.drain();
break;
} catch (e: any) {
// TODO: add backoff/queuing logic for ENOTFOUND (no internet)
if (
typeof e.message === "string" &&
(e.message.includes("Invalid key") ||
Expand All @@ -72,8 +72,14 @@ class CosmosDBLoggerSink implements LoggerSink {
break;
}

// Retry
// Retry with exponential backoff (handles ENOTFOUND and other transient errors)
debugCosmos(`ERROR: ${e}`);
if (attempt < MAX_RETRY) {
await new Promise<void>((resolve) =>
setTimeout(resolve, delay),
);
delay *= 2;
}
}
}
// Clear the timeout so it can schedule after the next log event.
Expand Down
10 changes: 8 additions & 2 deletions ts/packages/telemetry/src/logger/mongoLoggerSink.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ class MongoDBLoggerSink implements LoggerSink {

private async upload() {
let attempt = 0;
let delay = UPLOAD_DELAY;
while (attempt < MAX_RETRY) {
attempt++;
try {
await this.drain();
break;
} catch (e: any) {
// TODO: add backoff/queuing logic for ENOTFOUND (no internet)
if (
typeof e.message === "string" &&
e.message.includes("Invalid key")
Expand All @@ -56,8 +56,14 @@ class MongoDBLoggerSink implements LoggerSink {
break;
}

// Retry
// Retry with exponential backoff (handles ENOTFOUND and other transient errors)
debugMongo(`ERROR: ${e}`);
if (attempt < MAX_RETRY) {
await new Promise<void>((resolve) =>
setTimeout(resolve, delay),
);
delay *= 2;
}
}
}
// Clear the timeout so it can schedule after the next log event.
Expand Down
16 changes: 10 additions & 6 deletions ts/packages/typeagent/src/storage/embeddingFS.ts
Original file line number Diff line number Diff line change
Expand Up @@ -155,13 +155,17 @@ export async function createEmbeddingFolder(
names: string[];
embeddings: Embedding[];
}> {
// TODO: parallelize
let names: string[] = [];
let embeddings: Embedding[] = [];
for (const name of nameSubset) {
const entry = await folder.get(name);
const loaded = await asyncArray.mapAsync(
nameSubset,
concurrency!,
(name) => folder.get(name),
);
const names: string[] = [];
const embeddings: Embedding[] = [];
for (let i = 0; i < nameSubset.length; i++) {
const entry = loaded[i];
if (entry) {
names.push(name);
names.push(nameSubset[i]);
embeddings.push(entry);
}
}
Expand Down
83 changes: 83 additions & 0 deletions ts/packages/typeagent/test/storage.embeddingFS.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

import { removeDir } from "../src/objStream.js";
import { createEmbeddingFolder } from "../src/storage/embeddingFS.js";
import { SimilarityType } from "../src/vector/embeddings.js";
import { generateRandomEmbedding, testDirectoryPath } from "./common.js";

describe("storage.embeddingFS", () => {
const storePath = testDirectoryPath("embeddingFS_test");

beforeEach(async () => {
await removeDir(storePath);
});

test("nearestNeighborsInSubset returns closest embedding from subset", async () => {
const folder = await createEmbeddingFolder(storePath);
const dim = 16;

// Store 4 embeddings
const a = generateRandomEmbedding(dim);
const b = generateRandomEmbedding(dim);
const c = generateRandomEmbedding(dim);
const d = generateRandomEmbedding(dim);
await folder.put(a, "a");
await folder.put(b, "b");
await folder.put(c, "c");
await folder.put(d, "d");

// Query with embedding identical to "b"
const results = await folder.nearestNeighborsInSubset(
b,
["a", "b", "c"],
3,
SimilarityType.Cosine,
);

// "b" should be first with score ~1
expect(results.length).toBeGreaterThan(0);
expect(results[0].item).toBe("b");
expect(results[0].score).toBeCloseTo(1, 4);
// "d" is not in the subset so should not appear
const items = results.map((r) => r.item);
expect(items).not.toContain("d");
});

test("nearestNeighborsInSubset handles subset that excludes some stored items", async () => {
const folder = await createEmbeddingFolder(storePath);
const dim = 8;

const e1 = generateRandomEmbedding(dim);
const e2 = generateRandomEmbedding(dim);
const e3 = generateRandomEmbedding(dim);
await folder.put(e1, "e1");
await folder.put(e2, "e2");
await folder.put(e3, "e3");

// Only search subset ["e1", "e3"] — e2 should never appear
const results = await folder.nearestNeighborsInSubset(
e1,
["e1", "e3"],
2,
SimilarityType.Cosine,
);
const items = results.map((r) => r.item);
expect(items).not.toContain("e2");
expect(items).toContain("e1");
});

test("nearestNeighborsInSubset returns empty for empty subset", async () => {
const folder = await createEmbeddingFolder(storePath);
const e = generateRandomEmbedding(8);
await folder.put(e, "only");

const results = await folder.nearestNeighborsInSubset(
e,
[],
3,
SimilarityType.Cosine,
);
expect(results).toHaveLength(0);
});
});
Loading