From 61a8202f16f10cd113c1985fbf898bdf09601c15 Mon Sep 17 00:00:00 2001 From: Justin Poehnelt Date: Fri, 28 Mar 2025 12:29:23 -0600 Subject: [PATCH 1/2] feat: add embeddings package --- packages/embeddings/README.md | 3 + packages/embeddings/package.json | 18 ++ packages/embeddings/src/index.test.ts | 130 ++++++++++++ packages/embeddings/src/index.ts | 148 +++++++++++++ packages/embeddings/tsconfig.json | 14 ++ pnpm-lock.yaml | 294 ++++++++++++++++++++++++++ 6 files changed, 607 insertions(+) create mode 100644 packages/embeddings/README.md create mode 100644 packages/embeddings/package.json create mode 100644 packages/embeddings/src/index.test.ts create mode 100644 packages/embeddings/src/index.ts create mode 100644 packages/embeddings/tsconfig.json diff --git a/packages/embeddings/README.md b/packages/embeddings/README.md new file mode 100644 index 0000000..7687109 --- /dev/null +++ b/packages/embeddings/README.md @@ -0,0 +1,3 @@ +# Embeddings + +This package provides functions for generating embeddings using Vertex AI and calculating similarity between embeddings in Apps Script. diff --git a/packages/embeddings/package.json b/packages/embeddings/package.json new file mode 100644 index 0000000..e3b77eb --- /dev/null +++ b/packages/embeddings/package.json @@ -0,0 +1,18 @@ +{ + "name": "@repository/embeddings", + "version": "0.1.0", + "scripts": { + "check": "tsc --noEmit", + "test": "vitest run" + }, + "author": "Justin Poehnelt ", + "license": "Apache-2.0", + "devDependencies": { + "@types/google-apps-script": "^1.0.97", + "vitest": "^3.0.9" + }, + "type": "module", + "private": true, + "main": "./src/index.ts", + "types": "./src/index.ts" +} diff --git a/packages/embeddings/src/index.test.ts b/packages/embeddings/src/index.test.ts new file mode 100644 index 0000000..362e1eb --- /dev/null +++ b/packages/embeddings/src/index.test.ts @@ -0,0 +1,130 @@ +import { Mock, beforeEach, describe, expect, it, vi } from "vitest"; +import { batchedEmbeddings, similarity, similarityEmoji } from "./index.js"; + +// Mock Google Apps Script global objects +global.ScriptApp = { + getOAuthToken: vi.fn().mockReturnValue("mock-token"), +} as unknown as typeof ScriptApp; +global.PropertiesService = { + getScriptProperties: vi.fn().mockReturnValue({ + getProperty: vi + .fn() + .mockImplementation((key) => + key === "PROJECT_ID" ? "mock-project-id" : null, + ), + }), +} as unknown as typeof PropertiesService; + +const fetchAll = vi.fn(); +global.UrlFetchApp = { fetchAll } as unknown as typeof UrlFetchApp; + +describe("similarity", () => { + it("calculates cosine similarity correctly", () => { + // Parallel vectors (should be 1.0) + expect(similarity([1, 2, 3], [2, 4, 6])).toBeCloseTo(1.0); + + // Orthogonal vectors (should be 0.0) + expect(similarity([1, 0, 0], [0, 1, 0])).toBeCloseTo(0.0); + + // Opposite vectors (should be -1.0) + expect(similarity([1, 2, 3], [-1, -2, -3])).toBeCloseTo(-1.0); + }); + + it("throws an error when vectors have different lengths", () => { + expect(() => similarity([1, 2, 3, 4], [1, 2, 3])).toThrow( + "Vectors must have the same length", + ); + }); +}); + +describe("similarityEmoji", () => { + it("returns the correct emoji based on similarity value", () => { + expect(similarityEmoji(1.0)).toBe("🔥"); // Very high (>=0.9) + expect(similarityEmoji(0.8)).toBe("✅"); // High (>=0.7 and <0.9) + expect(similarityEmoji(0.6)).toBe("👍"); // Medium (>=0.5 and <0.7) + expect(similarityEmoji(0.4)).toBe("🤔"); // Low (>=0.3 and <0.5) + expect(similarityEmoji(0.2)).toBe("❌"); // Very low (<0.3) + }); +}); + +describe("batchedEmbeddings", () => { + const mockResponse = { + getResponseCode: vi.fn().mockReturnValue(200), + getContentText: vi.fn().mockReturnValue( + JSON.stringify({ + predictions: [{ embeddings: { values: [0.1, 0.2, 0.3] } }], + }), + ), + }; + + beforeEach(() => { + vi.clearAllMocks(); + fetchAll.mockReturnValue([mockResponse]); + }); + + it("handles single string input", () => { + const result = batchedEmbeddings("test text"); + + expect(fetchAll).toHaveBeenCalledTimes(1); + const requests = fetchAll.mock.calls[0][0]; + expect(requests).toHaveLength(1); + + const payload = JSON.parse(requests[0].payload); + expect(payload.instances[0].content).toBe("test text"); + + expect(result).toEqual([[0.1, 0.2, 0.3]]); + }); + + it("handles array of strings input", () => { + const mockResponses = [ + { + getResponseCode: vi.fn().mockReturnValue(200), + getContentText: vi.fn().mockReturnValue( + JSON.stringify({ + predictions: [{ embeddings: { values: [0.1, 0.2, 0.3] } }], + }), + ), + }, + { + getResponseCode: vi.fn().mockReturnValue(200), + getContentText: vi.fn().mockReturnValue( + JSON.stringify({ + predictions: [{ embeddings: { values: [0.4, 0.5, 0.6] } }], + }), + ), + }, + ]; + + fetchAll.mockReturnValue(mockResponses); + + const result = batchedEmbeddings(["text1", "text2"]); + expect(result).toEqual([ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + ]); + }); + + it("uses custom parameters and handles errors", () => { + // Test custom parameters + batchedEmbeddings("test", { + model: "custom-model", + parameters: {}, + projectId: "custom-project", + region: "custom-region", + }); + + const requests = fetchAll.mock.calls[0][0]; + expect(requests[0].url).toContain("custom-region"); + expect(requests[0].url).toContain("custom-model"); + + // Test error handling + fetchAll.mockReturnValue([ + { + getResponseCode: vi.fn().mockReturnValue(400), + getContentText: vi.fn().mockReturnValue("Bad Request"), + }, + ]); + + expect(() => batchedEmbeddings("test")).toThrow("Bad Request"); + }); +}); diff --git a/packages/embeddings/src/index.ts b/packages/embeddings/src/index.ts new file mode 100644 index 0000000..2507e79 --- /dev/null +++ b/packages/embeddings/src/index.ts @@ -0,0 +1,148 @@ +const MODEL_ID = "text-embedding-005"; +const REGION = "us-central1"; + +interface Parameters { + autoTruncate?: boolean; + outputDimensionality?: number; +} + +/** + * Options for generating embeddings. + */ +interface Options { + /** + * The project ID that the model is in. + * @default 'PropertiesService.getScriptProperties().getProperty("PROJECT_ID")' + */ + projectId?: string; + + /** + * The ID of the model to use. + * @default 'text-embedding-005'. + */ + model?: string; + + /** + * Additional parameters to pass to the model. + */ + parameters?: Parameters; + + /** + * The region that the model is in. + * @default 'us-central1' + */ + region?: string; + + /** + * The OAuth token to use to authenticate the request. + * @default `ScriptApp.getOAuthToken()` + */ + token?: string; +} + +const getProjectId = (): string => { + const projectId = + PropertiesService.getScriptProperties().getProperty("PROJECT_ID"); + if (!projectId) { + throw new Error("PROJECT_ID not found in script properties"); + } + + return projectId; +}; + +/** + * Generate embeddings for the given text. + * @param text - The text to generate embeddings for. + * @param options - Options for the embeddings generation. + * @returns The generated embeddings. + */ +export function batchedEmbeddings( + text: string | string[], + { + parameters = {}, + model = MODEL_ID, + projectId = getProjectId(), + region = REGION, + token = ScriptApp.getOAuthToken(), + }: Options = {}, +): number[][] { + const inputs = !Array.isArray(text) ? [text] : text; + + // TODO chunk in instances of 5 + const requests = inputs.map((content) => ({ + url: `https://${region}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${region}/publishers/google/models/${model}:predict`, + method: "post" as const, + headers: { + Authorization: `Bearer ${token}`, + "Content-Type": "application/json", + }, + muteHttpExceptions: true, + contentType: "application/json", + payload: JSON.stringify({ + instances: [{ content }], + parameters, + }), + })); + + const responses = UrlFetchApp.fetchAll(requests); + + const results = responses.map((response) => { + if (response.getResponseCode() !== 200) { + throw new Error(response.getContentText()); + } + + return JSON.parse(response.getContentText()); + }); + + return results.map((result) => result.predictions[0].embeddings.values); +} + +/** + * Calculates the dot product of two vectors. + * @param x - The first vector. + * @param y - The second vector. + */ +function dotProduct_(x: number[], y: number[]): number { + let result = 0; + for (let i = 0, l = Math.min(x.length, y.length); i < l; i += 1) { + result += x[i] * y[i]; + } + return result; +} + +/** + * Calculates the magnitude of a vector. + * @param x - The vector. + */ +function magnitude(x: number[]): number { + let result = 0; + for (let i = 0, l = x.length; i < l; i += 1) { + result += x[i] ** 2; + } + return Math.sqrt(result); +} + +/** + * Calculates the cosine similarity between two vectors. + * @param x - The first vector. + * @param y - The second vector. + * @returns The cosine similarity value between -1 and 1. + */ +export function similarity(x: number[], y: number[]): number { + if (x.length !== y.length) { + throw new Error("Vectors must have the same length"); + } + return dotProduct_(x, y) / (magnitude(x) * magnitude(y)); +} + +/** + * Returns an emoji representing the similarity value. + * @param value - The similarity value. + */ +export const similarityEmoji = (value: number): string => { + if (value >= 0.9) return "🔥"; // Very high similarity + if (value >= 0.7) return "✅"; // High similarity + if (value >= 0.5) return "👍"; // Medium similarity + if (value >= 0.3) return "🤔"; // Low similarity + return "❌"; // Very low similarity +}; diff --git a/packages/embeddings/tsconfig.json b/packages/embeddings/tsconfig.json new file mode 100644 index 0000000..4b1a443 --- /dev/null +++ b/packages/embeddings/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "module": "NodeNext", + "target": "ES2022", + "lib": ["esnext"], + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "types": ["@types/google-apps-script"], + "experimentalDecorators": true + }, + "include": ["src/**/*.ts"], + "exclude": ["node_modules", "dist"] +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c017c79..b7db9b7 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -45,6 +45,15 @@ importers: specifier: 'catalog:' version: 5.8.2 + packages/embeddings: + devDependencies: + '@types/google-apps-script': + specifier: ^1.0.97 + version: 1.0.97 + vitest: + specifier: ^3.0.9 + version: 3.0.9(@types/debug@4.1.12)(@types/node@22.13.10)(jiti@2.4.2)(lightningcss@1.29.2)(tsx@4.19.3)(yaml@2.7.0) + packages/mjml-apps-script: dependencies: mjml-browser: @@ -1025,6 +1034,35 @@ packages: '@ungap/structured-clone@1.3.0': resolution: {integrity: sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g==} + '@vitest/expect@3.0.9': + resolution: {integrity: sha512-5eCqRItYgIML7NNVgJj6TVCmdzE7ZVgJhruW0ziSQV4V7PvLkDL1bBkBdcTs/VuIz0IxPb5da1IDSqc1TR9eig==} + + '@vitest/mocker@3.0.9': + resolution: {integrity: sha512-ryERPIBOnvevAkTq+L1lD+DTFBRcjueL9lOUfXsLfwP92h4e+Heb+PjiqS3/OURWPtywfafK0kj++yDFjWUmrA==} + peerDependencies: + msw: ^2.4.9 + vite: ^5.0.0 || ^6.0.0 + peerDependenciesMeta: + msw: + optional: true + vite: + optional: true + + '@vitest/pretty-format@3.0.9': + resolution: {integrity: sha512-OW9F8t2J3AwFEwENg3yMyKWweF7oRJlMyHOMIhO5F3n0+cgQAJZBjNgrF8dLwFTEXl5jUqBLXd9QyyKv8zEcmA==} + + '@vitest/runner@3.0.9': + resolution: {integrity: sha512-NX9oUXgF9HPfJSwl8tUZCMP1oGx2+Sf+ru6d05QjzQz4OwWg0psEzwY6VexP2tTHWdOkhKHUIZH+fS6nA7jfOw==} + + '@vitest/snapshot@3.0.9': + resolution: {integrity: sha512-AiLUiuZ0FuA+/8i19mTYd+re5jqjEc2jZbgJ2up0VY0Ddyyxg/uUtBDpIFAy4uzKaQxOW8gMgBdAJJ2ydhu39A==} + + '@vitest/spy@3.0.9': + resolution: {integrity: sha512-/CcK2UDl0aQ2wtkp3YVWldrpLRNCfVcIOFGlVGKO4R5eajsH393Z1yiXLVQ7vWsj26JOEjeZI0x5sm5P4OGUNQ==} + + '@vitest/utils@3.0.9': + resolution: {integrity: sha512-ilHM5fHhZ89MCp5aAaM9uhfl1c2JdxVxl3McqsdVyVNN6JffnEen8UMCdRTzOhGXNQGo5GNL9QugHrz727Wnng==} + acorn@8.14.1: resolution: {integrity: sha512-OvQ/2pUDKmgfCg++xsTX1wGxfTaszcHVcTctW4UJB4hibJx2HXxxO5UmVgyjMa+ZDsiaf5wWLXYpRWMmBI0QHg==} engines: {node: '>=0.4.0'} @@ -1097,6 +1135,10 @@ packages: resolution: {integrity: sha512-1OvF9IbWwaeiM9VhzYXVQacMibxpXOMYVNIvMtKRyX9SImBXpKcFr8XvFDeEslCyuH/t6KRt7HEO94AlP8Iatw==} engines: {node: '>=12'} + assertion-error@2.0.1: + resolution: {integrity: sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==} + engines: {node: '>=12'} + axobject-query@4.1.0: resolution: {integrity: sha512-qIj0G9wZbMGNLjLmg1PT6v2mE9AH2zlnADJD/2tC6E00hgmhUOfEB6greHPAfLRSufHqROIUTkw6E+M3lH0PTQ==} engines: {node: '>= 0.4'} @@ -1128,6 +1170,10 @@ packages: resolution: {integrity: sha512-tjwM5exMg6BGRI+kNmTntNsvdZS1X8BFYS6tnJ2hdH0kVxM6/eVZ2xy+FqStSWvYmtfFMDLIxurorHwDKfDz5Q==} engines: {node: '>=18'} + cac@6.7.14: + resolution: {integrity: sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==} + engines: {node: '>=8'} + call-bind-apply-helpers@1.0.2: resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==} engines: {node: '>= 0.4'} @@ -1139,6 +1185,10 @@ packages: ccount@2.0.1: resolution: {integrity: sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==} + chai@5.2.0: + resolution: {integrity: sha512-mCuXncKXk5iCLhfhwTc0izo0gtEmpz5CtG2y8GiOINBlMVS6v8TMRc5TaLWKS6692m9+dVVfzgeVxR5UxWHTYw==} + engines: {node: '>=12'} + chalk@4.1.2: resolution: {integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==} engines: {node: '>=10'} @@ -1156,6 +1206,10 @@ packages: chardet@0.7.0: resolution: {integrity: sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA==} + check-error@2.1.1: + resolution: {integrity: sha512-OAlb+T7V4Op9OwdkjmguYRqncdlx5JiofwOAUkmTF+jNdHwzTaTs4sRAGpzLF3oOz5xAyDGrPgeIDFQmDOTiJw==} + engines: {node: '>= 16'} + chokidar@4.0.3: resolution: {integrity: sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==} engines: {node: '>= 14.16.0'} @@ -1234,6 +1288,10 @@ packages: decimal.js@10.5.0: resolution: {integrity: sha512-8vDa8Qxvr/+d94hSh5P3IJwI5t8/c0KsMp+g8bNw9cY2icONa5aPfvKeieW1WlG0WQYwwhJ7mjui2xtiePQSXw==} + deep-eql@5.0.2: + resolution: {integrity: sha512-h5k/5U50IJJFpzfL6nO9jaaumfjO/f2NjK/oYB2Djzm4p9L+3T9qWpZqZ2hAbLPuuYq9wrU08WQyBTL5GbPk5Q==} + engines: {node: '>=6'} + deepmerge@4.3.1: resolution: {integrity: sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==} engines: {node: '>=0.10.0'} @@ -1319,6 +1377,9 @@ packages: resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==} engines: {node: '>= 0.4'} + es-module-lexer@1.6.0: + resolution: {integrity: sha512-qqnD1yMU6tk/jnaMosogGySTZP8YtUgAffA9nMN+E/rjxcfRQ6IEk7IiozUjgxKoFHBGjTLnrHB/YC45r/59EQ==} + es-object-atoms@1.1.1: resolution: {integrity: sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==} engines: {node: '>= 0.4'} @@ -1347,6 +1408,9 @@ packages: esrap@1.4.5: resolution: {integrity: sha512-CjNMjkBWWZeHn+VX+gS8YvFwJ5+NDhg8aWZBSFJPR8qQduDNjbJodA2WcwCm7uQa5Rjqj+nZvVmceg1RbHFB9g==} + estree-walker@3.0.3: + resolution: {integrity: sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==} + eventemitter3@5.0.1: resolution: {integrity: sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA==} @@ -1358,6 +1422,10 @@ packages: resolution: {integrity: sha512-VyhnebXciFV2DESc+p6B+y0LjSm0krU4OgJN44qFAhBY0TJ+1V61tYD2+wHusZ6F9n5K+vl8k0sTy7PEfV4qpg==} engines: {node: '>=16.17'} + expect-type@1.2.0: + resolution: {integrity: sha512-80F22aiJ3GLyVnS/B3HzgR6RelZVumzj9jkL0Rhz4h0xYbNW9PjlQz5h3J/SShErbXBc295vseR4/MIbVmUbeA==} + engines: {node: '>=12.0.0'} + extend@3.0.2: resolution: {integrity: sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==} @@ -1792,6 +1860,9 @@ packages: resolution: {integrity: sha512-S0FayMXku80toa5sZ6Ro4C+s+EtFDCsyJNG/AzFMfX3AxD5Si4dZsgzm/kKnbOxHl5Cv8jBlno8+3XYIh2pNjQ==} engines: {node: '>=8'} + loupe@3.1.3: + resolution: {integrity: sha512-kkIp7XSkP78ZxJEsSxW3712C6teJVoeHHwgo9zJ380de7IYyJ2ISlxojcH2pC5OFLewESmnRi/+XCDIEEVyoug==} + lru-cache@10.4.3: resolution: {integrity: sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==} @@ -2001,6 +2072,13 @@ packages: resolution: {integrity: sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==} engines: {node: '>=8'} + pathe@2.0.3: + resolution: {integrity: sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==} + + pathval@2.0.0: + resolution: {integrity: sha512-vE7JKRyES09KiunauX7nd2Q9/L7lhok4smP9RZTDeD4MVs72Dp2qNFVz39Nz5a0FVEW0BJR6C0DYrq6unoziZA==} + engines: {node: '>= 14.16'} + picocolors@1.1.1: resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==} @@ -2186,6 +2264,9 @@ packages: resolution: {integrity: sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==} engines: {node: '>= 0.4'} + siginfo@2.0.0: + resolution: {integrity: sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==} + signal-exit@3.0.7: resolution: {integrity: sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==} @@ -2238,6 +2319,12 @@ packages: sprintf-js@1.0.3: resolution: {integrity: sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==} + stackback@0.0.2: + resolution: {integrity: sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==} + + std-env@3.8.1: + resolution: {integrity: sha512-vj5lIj3Mwf9D79hBkltk5qmkFI+biIKWS2IBxEyEU3AX1tUf7AoL8nSazCOiiqQsGKIq01SClsKEzweu34uwvA==} + stdin-discarder@0.2.2: resolution: {integrity: sha512-UhDfHmA92YAlNnCfhmq0VeNL5bDbiZGg7sZ2IvPsXubGkiNa9EC+tUTsjBRsYUAz87btI6/1wf4XoVvQ3uRnmQ==} engines: {node: '>=18'} @@ -2313,6 +2400,24 @@ packages: resolution: {integrity: sha512-nt6AMGKW1p/70DF/hGBdJB57B8Tspmbp5gfJ8ilhLnt7kkr2ye7hzD6NVG8GGErk2HWF34igrL2CXmNIkzKqKw==} engines: {node: '>=18'} + tinybench@2.9.0: + resolution: {integrity: sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==} + + tinyexec@0.3.2: + resolution: {integrity: sha512-KQQR9yN7R5+OSwaK0XQoj22pwHoTlgYqmUscPYoknOoWCWfj/5/ABTMRi69FrKU5ffPVh5QcFikpWJI/P1ocHA==} + + tinypool@1.0.2: + resolution: {integrity: sha512-al6n+QEANGFOMf/dmUMsuS5/r9B06uwlyNjZZql/zv8J7ybHCgoihBNORZCY2mzUuAnomQa2JdhyHKzZxPCrFA==} + engines: {node: ^18.0.0 || >=20.0.0} + + tinyrainbow@2.0.0: + resolution: {integrity: sha512-op4nsTR47R6p0vMUUoYl/a+ljLFVtlfaXkLQmqfLR1qHma1h/ysYk4hEXZ880bf2CYgTskvTa/e196Vd5dDQXw==} + engines: {node: '>=14.0.0'} + + tinyspy@3.0.2: + resolution: {integrity: sha512-n1cw8k1k0x4pgA2+9XrOkFydTerNcJ1zWCO5Nn9scWHTD+5tp8dghT2x1uduQePZTZgd3Tupf+x9BxJjeJi77Q==} + engines: {node: '>=14.0.0'} + tmp@0.0.33: resolution: {integrity: sha512-jRCJlojKnZ3addtTOjdIqoRuPEKBvNXcGYqzO6zWZX8KfKEpnGY5jfggJQ3EjKuu8D4bJRr0y+cYJFmYbImXGw==} engines: {node: '>=0.6.0'} @@ -2443,6 +2548,11 @@ packages: vfile@6.0.3: resolution: {integrity: sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==} + vite-node@3.0.9: + resolution: {integrity: sha512-w3Gdx7jDcuT9cNn9jExXgOyKmf5UOTb6WMHz8LGAm54eS1Elf5OuBhCxl6zJxGhEeIkgsE1WbHuoL0mj/UXqXg==} + engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0} + hasBin: true + vite@6.2.1: resolution: {integrity: sha512-n2GnqDb6XPhlt9B8olZPrgMD/es/Nd1RdChF6CBD/fHW6pUyUTt2sQW2fPRX5GiD9XEa6+8A6A4f2vT6pSsE7Q==} engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0} @@ -2491,6 +2601,34 @@ packages: vite: optional: true + vitest@3.0.9: + resolution: {integrity: sha512-BbcFDqNyBlfSpATmTtXOAOj71RNKDDvjBM/uPfnxxVGrG+FSH2RQIwgeEngTaTkuU/h0ScFvf+tRcKfYXzBybQ==} + engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0} + hasBin: true + peerDependencies: + '@edge-runtime/vm': '*' + '@types/debug': ^4.1.12 + '@types/node': ^18.0.0 || ^20.0.0 || >=22.0.0 + '@vitest/browser': 3.0.9 + '@vitest/ui': 3.0.9 + happy-dom: '*' + jsdom: '*' + peerDependenciesMeta: + '@edge-runtime/vm': + optional: true + '@types/debug': + optional: true + '@types/node': + optional: true + '@vitest/browser': + optional: true + '@vitest/ui': + optional: true + happy-dom: + optional: true + jsdom: + optional: true + webidl-conversions@3.0.1: resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} @@ -2502,6 +2640,11 @@ packages: engines: {node: '>= 8'} hasBin: true + why-is-node-running@2.3.0: + resolution: {integrity: sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==} + engines: {node: '>=8'} + hasBin: true + wrap-ansi@6.2.0: resolution: {integrity: sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA==} engines: {node: '>=8'} @@ -3418,6 +3561,46 @@ snapshots: '@ungap/structured-clone@1.3.0': {} + '@vitest/expect@3.0.9': + dependencies: + '@vitest/spy': 3.0.9 + '@vitest/utils': 3.0.9 + chai: 5.2.0 + tinyrainbow: 2.0.0 + + '@vitest/mocker@3.0.9(vite@6.2.1(@types/node@22.13.10)(jiti@2.4.2)(lightningcss@1.29.2)(tsx@4.19.3)(yaml@2.7.0))': + dependencies: + '@vitest/spy': 3.0.9 + estree-walker: 3.0.3 + magic-string: 0.30.17 + optionalDependencies: + vite: 6.2.1(@types/node@22.13.10)(jiti@2.4.2)(lightningcss@1.29.2)(tsx@4.19.3)(yaml@2.7.0) + + '@vitest/pretty-format@3.0.9': + dependencies: + tinyrainbow: 2.0.0 + + '@vitest/runner@3.0.9': + dependencies: + '@vitest/utils': 3.0.9 + pathe: 2.0.3 + + '@vitest/snapshot@3.0.9': + dependencies: + '@vitest/pretty-format': 3.0.9 + magic-string: 0.30.17 + pathe: 2.0.3 + + '@vitest/spy@3.0.9': + dependencies: + tinyspy: 3.0.2 + + '@vitest/utils@3.0.9': + dependencies: + '@vitest/pretty-format': 3.0.9 + loupe: 3.1.3 + tinyrainbow: 2.0.0 + acorn@8.14.1: {} agent-base@7.1.3: {} @@ -3468,6 +3651,8 @@ snapshots: array-union@3.0.1: {} + assertion-error@2.0.1: {} + axobject-query@4.1.0: {} balanced-match@1.0.2: {} @@ -3494,6 +3679,8 @@ snapshots: dependencies: run-applescript: 7.0.0 + cac@6.7.14: {} + call-bind-apply-helpers@1.0.2: dependencies: es-errors: 1.3.0 @@ -3506,6 +3693,14 @@ snapshots: ccount@2.0.1: {} + chai@5.2.0: + dependencies: + assertion-error: 2.0.1 + check-error: 2.1.1 + deep-eql: 5.0.2 + loupe: 3.1.3 + pathval: 2.0.0 + chalk@4.1.2: dependencies: ansi-styles: 4.3.0 @@ -3519,6 +3714,8 @@ snapshots: chardet@0.7.0: {} + check-error@2.1.1: {} + chokidar@4.0.3: dependencies: readdirp: 4.1.2 @@ -3574,6 +3771,8 @@ snapshots: decimal.js@10.5.0: {} + deep-eql@5.0.2: {} + deepmerge@4.3.1: {} default-browser-id@5.0.0: {} @@ -3642,6 +3841,8 @@ snapshots: es-errors@1.3.0: {} + es-module-lexer@1.6.0: {} + es-object-atoms@1.1.1: dependencies: es-errors: 1.3.0 @@ -3686,6 +3887,10 @@ snapshots: dependencies: '@jridgewell/sourcemap-codec': 1.5.0 + estree-walker@3.0.3: + dependencies: + '@types/estree': 1.0.6 + eventemitter3@5.0.1: {} eventsource-parser@3.0.0: {} @@ -3702,6 +3907,8 @@ snapshots: signal-exit: 4.1.0 strip-final-newline: 3.0.0 + expect-type@1.2.0: {} + extend@3.0.2: {} extendable-error@0.1.7: {} @@ -4165,6 +4372,8 @@ snapshots: currently-unhandled: 0.4.1 signal-exit: 3.0.7 + loupe@3.1.3: {} + lru-cache@10.4.3: {} magic-string@0.30.17: @@ -4353,6 +4562,10 @@ snapshots: path-type@4.0.0: {} + pathe@2.0.3: {} + + pathval@2.0.0: {} + picocolors@1.1.1: {} picomatch@2.3.1: {} @@ -4545,6 +4758,8 @@ snapshots: side-channel-map: 1.0.1 side-channel-weakmap: 1.0.2 + siginfo@2.0.0: {} + signal-exit@3.0.7: {} signal-exit@4.1.0: {} @@ -4594,6 +4809,10 @@ snapshots: sprintf-js@1.0.3: {} + stackback@0.0.2: {} + + std-env@3.8.1: {} + stdin-discarder@0.2.2: {} string-argv@0.3.2: {} @@ -4676,6 +4895,16 @@ snapshots: throttleit@2.1.0: {} + tinybench@2.9.0: {} + + tinyexec@0.3.2: {} + + tinypool@1.0.2: {} + + tinyrainbow@2.0.0: {} + + tinyspy@3.0.2: {} + tmp@0.0.33: dependencies: os-tmpdir: 1.0.2 @@ -4790,6 +5019,27 @@ snapshots: '@types/unist': 3.0.3 vfile-message: 4.0.2 + vite-node@3.0.9(@types/node@22.13.10)(jiti@2.4.2)(lightningcss@1.29.2)(tsx@4.19.3)(yaml@2.7.0): + dependencies: + cac: 6.7.14 + debug: 4.4.0 + es-module-lexer: 1.6.0 + pathe: 2.0.3 + vite: 6.2.1(@types/node@22.13.10)(jiti@2.4.2)(lightningcss@1.29.2)(tsx@4.19.3)(yaml@2.7.0) + transitivePeerDependencies: + - '@types/node' + - jiti + - less + - lightningcss + - sass + - sass-embedded + - stylus + - sugarss + - supports-color + - terser + - tsx + - yaml + vite@6.2.1(@types/node@22.13.10)(jiti@2.4.2)(lightningcss@1.29.2)(tsx@4.19.3)(yaml@2.7.0): dependencies: esbuild: 0.25.0 @@ -4807,6 +5057,45 @@ snapshots: optionalDependencies: vite: 6.2.1(@types/node@22.13.10)(jiti@2.4.2)(lightningcss@1.29.2)(tsx@4.19.3)(yaml@2.7.0) + vitest@3.0.9(@types/debug@4.1.12)(@types/node@22.13.10)(jiti@2.4.2)(lightningcss@1.29.2)(tsx@4.19.3)(yaml@2.7.0): + dependencies: + '@vitest/expect': 3.0.9 + '@vitest/mocker': 3.0.9(vite@6.2.1(@types/node@22.13.10)(jiti@2.4.2)(lightningcss@1.29.2)(tsx@4.19.3)(yaml@2.7.0)) + '@vitest/pretty-format': 3.0.9 + '@vitest/runner': 3.0.9 + '@vitest/snapshot': 3.0.9 + '@vitest/spy': 3.0.9 + '@vitest/utils': 3.0.9 + chai: 5.2.0 + debug: 4.4.0 + expect-type: 1.2.0 + magic-string: 0.30.17 + pathe: 2.0.3 + std-env: 3.8.1 + tinybench: 2.9.0 + tinyexec: 0.3.2 + tinypool: 1.0.2 + tinyrainbow: 2.0.0 + vite: 6.2.1(@types/node@22.13.10)(jiti@2.4.2)(lightningcss@1.29.2)(tsx@4.19.3)(yaml@2.7.0) + vite-node: 3.0.9(@types/node@22.13.10)(jiti@2.4.2)(lightningcss@1.29.2)(tsx@4.19.3)(yaml@2.7.0) + why-is-node-running: 2.3.0 + optionalDependencies: + '@types/debug': 4.1.12 + '@types/node': 22.13.10 + transitivePeerDependencies: + - jiti + - less + - lightningcss + - msw + - sass + - sass-embedded + - stylus + - sugarss + - supports-color + - terser + - tsx + - yaml + webidl-conversions@3.0.1: {} whatwg-url@5.0.0: @@ -4818,6 +5107,11 @@ snapshots: dependencies: isexe: 2.0.0 + why-is-node-running@2.3.0: + dependencies: + siginfo: 2.0.0 + stackback: 0.0.2 + wrap-ansi@6.2.0: dependencies: ansi-styles: 4.3.0 From 7ac9f693dbd9e93b5b5e0b7b739a25dfe246fba3 Mon Sep 17 00:00:00 2001 From: Justin Poehnelt Date: Fri, 28 Mar 2025 13:13:28 -0600 Subject: [PATCH 2/2] fix: batched --- packages/embeddings/package.json | 2 +- packages/embeddings/src/index.test.ts | 24 +++------ packages/embeddings/src/index.ts | 71 +++++++++++++++++++++++---- 3 files changed, 69 insertions(+), 28 deletions(-) diff --git a/packages/embeddings/package.json b/packages/embeddings/package.json index e3b77eb..d5c57cf 100644 --- a/packages/embeddings/package.json +++ b/packages/embeddings/package.json @@ -3,7 +3,7 @@ "version": "0.1.0", "scripts": { "check": "tsc --noEmit", - "test": "vitest run" + "test": "vitest" }, "author": "Justin Poehnelt ", "license": "Apache-2.0", diff --git a/packages/embeddings/src/index.test.ts b/packages/embeddings/src/index.test.ts index 362e1eb..4fa8e26 100644 --- a/packages/embeddings/src/index.test.ts +++ b/packages/embeddings/src/index.test.ts @@ -1,5 +1,5 @@ -import { Mock, beforeEach, describe, expect, it, vi } from "vitest"; -import { batchedEmbeddings, similarity, similarityEmoji } from "./index.js"; +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { getTextEmbeddings, similarity, similarityEmoji } from "./index.js"; // Mock Google Apps Script global objects global.ScriptApp = { @@ -47,7 +47,7 @@ describe("similarityEmoji", () => { }); }); -describe("batchedEmbeddings", () => { +describe("getEmbeddings", () => { const mockResponse = { getResponseCode: vi.fn().mockReturnValue(200), getContentText: vi.fn().mockReturnValue( @@ -63,7 +63,7 @@ describe("batchedEmbeddings", () => { }); it("handles single string input", () => { - const result = batchedEmbeddings("test text"); + const result = getTextEmbeddings("test text"); expect(fetchAll).toHaveBeenCalledTimes(1); const requests = fetchAll.mock.calls[0][0]; @@ -97,16 +97,16 @@ describe("batchedEmbeddings", () => { fetchAll.mockReturnValue(mockResponses); - const result = batchedEmbeddings(["text1", "text2"]); + const result = getTextEmbeddings(["text1", "text2"]); expect(result).toEqual([ [0.1, 0.2, 0.3], [0.4, 0.5, 0.6], ]); }); - it("uses custom parameters and handles errors", () => { + it("uses custom parameters", () => { // Test custom parameters - batchedEmbeddings("test", { + getTextEmbeddings("test", { model: "custom-model", parameters: {}, projectId: "custom-project", @@ -116,15 +116,5 @@ describe("batchedEmbeddings", () => { const requests = fetchAll.mock.calls[0][0]; expect(requests[0].url).toContain("custom-region"); expect(requests[0].url).toContain("custom-model"); - - // Test error handling - fetchAll.mockReturnValue([ - { - getResponseCode: vi.fn().mockReturnValue(400), - getContentText: vi.fn().mockReturnValue("Bad Request"), - }, - ]); - - expect(() => batchedEmbeddings("test")).toThrow("Bad Request"); }); }); diff --git a/packages/embeddings/src/index.ts b/packages/embeddings/src/index.ts index 2507e79..07ae0ee 100644 --- a/packages/embeddings/src/index.ts +++ b/packages/embeddings/src/index.ts @@ -6,6 +6,20 @@ interface Parameters { outputDimensionality?: number; } +interface Instance { + task_type?: + | "RETRIEVAL_DOCUMENT" + | "RETRIEVAL_QUERY" + | "SEMANTIC_SIMILARITY" + | "CLASSIFICATION" + | "CLUSTERING" + | "QUESTION_ANSWERING" + | "FACT_VERIFICATION" + | "CODE_RETRIEVAL_QUERY"; + title?: string; + content: string; +} + /** * Options for generating embeddings. */ @@ -51,13 +65,39 @@ const getProjectId = (): string => { }; /** - * Generate embeddings for the given text. - * @param text - The text to generate embeddings for. + * Generate embeddings for the given text content. + * + * @param content - The text content to generate embeddings for. * @param options - Options for the embeddings generation. * @returns The generated embeddings. + * + * @see https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api */ -export function batchedEmbeddings( - text: string | string[], +export function getTextEmbeddings( + contentOrContentArray: string | string[], + options: Options = {}, +): number[][] { + const inputs = Array.isArray(contentOrContentArray) + ? contentOrContentArray + : [contentOrContentArray]; + + return getBatchedEmbeddings( + inputs.map((content) => ({ content })), + options, + ); +} + +/** + * Generate embeddings for the given instances in parallel UrlFetchApp requests. + * + * @param instances - The instances to generate embeddings for. + * @param options - Options for the embeddings generation. + * @returns The generated embeddings. + * + * @see https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api + */ +export function getBatchedEmbeddings( + instances: Instance[], { parameters = {}, model = MODEL_ID, @@ -66,10 +106,8 @@ export function batchedEmbeddings( token = ScriptApp.getOAuthToken(), }: Options = {}, ): number[][] { - const inputs = !Array.isArray(text) ? [text] : text; - - // TODO chunk in instances of 5 - const requests = inputs.map((content) => ({ + const chunks = chunkArray(instances, 5); + const requests = chunks.map((instances) => ({ url: `https://${region}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${region}/publishers/google/models/${model}:predict`, method: "post" as const, headers: { @@ -79,7 +117,7 @@ export function batchedEmbeddings( muteHttpExceptions: true, contentType: "application/json", payload: JSON.stringify({ - instances: [{ content }], + instances, parameters, }), })); @@ -94,7 +132,12 @@ export function batchedEmbeddings( return JSON.parse(response.getContentText()); }); - return results.map((result) => result.predictions[0].embeddings.values); + return results.flatMap((result) => + result.predictions.map( + (prediction: { embeddings: { values: number[] } }) => + prediction.embeddings.values, + ), + ); } /** @@ -146,3 +189,11 @@ export const similarityEmoji = (value: number): string => { if (value >= 0.3) return "🤔"; // Low similarity return "❌"; // Very low similarity }; + +function chunkArray(array: T[], size: number): T[][] { + const chunks: T[][] = []; + for (let i = 0; i < array.length; i += size) { + chunks.push(array.slice(i, i + size)); + } + return chunks; +}