From 3c50606a57a8dc18c91fe36e3b180bafd15a4cbc Mon Sep 17 00:00:00 2001 From: Svetlana Brennan <50715937+svetlanabrennan@users.noreply.github.com> Date: Tue, 5 Mar 2024 10:48:47 -0600 Subject: [PATCH] feat: Added instrumentation for VectorStore.similaritySearch for langchain.js (#2049) Co-authored-by: Bob Evans --- docker-compose.yml | 2 +- lib/instrumentation/langchain/nr-hooks.js | 6 + lib/instrumentation/langchain/vectorstore.js | 108 ++++++++ lib/llm-events/error-message.js | 5 +- .../langchain/vector-search-result.js | 5 +- lib/metrics/names.js | 6 +- .../langchain/vectorstore.test.js | 68 +++++ .../langchain/vector-search-result.test.js | 19 +- test/unit/llm-events/openai/error.test.js | 3 +- test/versioned/langchain/common.js | 55 +++- test/versioned/langchain/package.json | 7 +- .../langchain/runnables-streaming.tap.js | 20 +- test/versioned/langchain/runnables.tap.js | 20 +- test/versioned/langchain/vectorstore.tap.js | 257 ++++++++++++++++++ test/versioned/openai/mock-responses.js | 4 +- test/versioned/openai/mock-server.js | 4 + 16 files changed, 551 insertions(+), 38 deletions(-) create mode 100644 lib/instrumentation/langchain/vectorstore.js create mode 100644 test/unit/instrumentation/langchain/vectorstore.test.js create mode 100644 test/versioned/langchain/vectorstore.tap.js diff --git a/docker-compose.yml b/docker-compose.yml index 5ab17035f1..ac6fb6a20a 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ version: "3" services: elasticsearch: container_name: nr_node_elastic - image: docker.elastic.co/elasticsearch/elasticsearch:8.7.1 + image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0 environment: - "ES_JAVA_OPTS=-Xms512m -Xmx512m" # Set cluster to single node diff --git a/lib/instrumentation/langchain/nr-hooks.js b/lib/instrumentation/langchain/nr-hooks.js index 8a47a554ef..961257c917 100644 --- a/lib/instrumentation/langchain/nr-hooks.js +++ b/lib/instrumentation/langchain/nr-hooks.js @@ -7,6 +7,7 @@ const toolsInstrumentation = require('./tools') const cbManagerInstrumentation = require('./callback-manager') const runnableInstrumentation = require('./runnable') +const vectorstoreInstrumentation = require('./vectorstore') module.exports = [ { @@ -23,5 +24,10 @@ module.exports = [ type: 'generic', moduleName: '@langchain/core/dist/runnables/base', onRequire: runnableInstrumentation + }, + { + type: 'generic', + moduleName: '@langchain/core/vectorstores', + onRequire: vectorstoreInstrumentation } ] diff --git a/lib/instrumentation/langchain/vectorstore.js b/lib/instrumentation/langchain/vectorstore.js new file mode 100644 index 0000000000..d2ebba5f45 --- /dev/null +++ b/lib/instrumentation/langchain/vectorstore.js @@ -0,0 +1,108 @@ +/* + * Copyright 2024 New Relic Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +'use strict' +const { + AI: { LANGCHAIN } +} = require('../../metrics/names') +const { LangChainVectorSearch, LangChainVectorSearchResult } = require('../../llm-events/langchain') +const { recordEvent, shouldSkipInstrumentation } = require('./common') +const { DESTINATIONS } = require('../../config/attribute-filter') +const { RecorderSpec } = require('../../shim/specs') +const LlmErrorMessage = require('../../llm-events/error-message') + +/** + * Generates a LangChainVectorSearch for entire search request. + * Also iterates over documents in output and generates a + * LangChainVectorSearchResult for each document. + * + * @param {object} params input params + * @param {string} params.request vector search query + * @param {number} params.k vector search top k + * @param {object} params.output vector search documents + * @param {Agent} params.agent NR agent instance + * @param {TraceSegment} params.segment active segment from vector search + * @param {string} params.pkgVersion langchain version + * @param {err} params.err if it exists + */ +function recordVectorSearch({ request, k, output, agent, segment, pkgVersion, err }) { + const vectorSearch = new LangChainVectorSearch({ + agent, + segment, + query: request, + k, + documents: output, + error: err !== null + }) + + recordEvent({ agent, type: 'LlmVectorSearch', pkgVersion, msg: vectorSearch }) + + output.forEach((document, sequence) => { + const vectorSearchResult = new LangChainVectorSearchResult({ + agent, + segment, + metadata: document.metadata, + pageContent: document.pageContent, + sequence, + search_id: vectorSearch.id + }) + + recordEvent({ + agent, + type: 'LlmVectorSearchResult', + pkgVersion, + msg: vectorSearchResult + }) + }) + + if (err) { + agent.errors.add( + segment.transaction, + err, + new LlmErrorMessage({ + response: output, + cause: err, + vectorsearch: vectorSearch + }) + ) + } +} + +module.exports = function initialize(shim, vectorstores) { + const { agent, pkgVersion } = shim + + if (shouldSkipInstrumentation(agent.config)) { + shim.logger.debug( + 'langchain instrumentation is disabled. To enable set `config.ai_monitoring.enabled` to true' + ) + return + } + + shim.record( + vectorstores.VectorStore.prototype, + 'similaritySearch', + function wrapCall(shim, similaritySearch, fnName, args) { + const [request, k] = args + + return new RecorderSpec({ + name: `${LANGCHAIN.VECTORSTORE}/${fnName}`, + promise: true, + // eslint-disable-next-line max-params + after(_shim, _fn, _name, err, output, segment) { + if (!output) { + // If we get an error, it is possible that `output = null`. + // In that case, we define it to be an empty array. + output = [] + } + + segment.end() + recordVectorSearch({ request, k, output, agent, segment, pkgVersion, err }) + + segment.transaction.trace.attributes.addAttribute(DESTINATIONS.TRANS_EVENT, 'llm', true) + } + }) + } + ) +} diff --git a/lib/llm-events/error-message.js b/lib/llm-events/error-message.js index e448781411..cf28e70126 100644 --- a/lib/llm-events/error-message.js +++ b/lib/llm-events/error-message.js @@ -18,14 +18,17 @@ module.exports = class LlmErrorMessage { * conversation if it was a chat completion conversation. * @param {LlmEmbedding} [params.embedding] Details about the conversation * if it was an embedding conversation. + * @param {LlmVectorStoreSearch} [params.vectorsearch] Details about the vector + * search if it was a vector search event. */ - constructor({ response, cause, summary, embedding } = {}) { + constructor({ response, cause, summary, embedding, vectorsearch } = {}) { this['http.statusCode'] = response?.status ?? cause?.status this['error.message'] = cause?.message this['error.code'] = response?.code ?? cause?.error?.code this['error.param'] = response?.param ?? cause?.error?.param this.completion_id = summary?.id this.embedding_id = embedding?.id + this.vector_store_id = vectorsearch?.id } get [Symbol.toStringTag]() { diff --git a/lib/llm-events/langchain/vector-search-result.js b/lib/llm-events/langchain/vector-search-result.js index 2612218fc8..7aae16cd2b 100644 --- a/lib/llm-events/langchain/vector-search-result.js +++ b/lib/llm-events/langchain/vector-search-result.js @@ -6,13 +6,13 @@ 'use strict' const LangChainEvent = require('./event') -const crypto = require('crypto') /** * @typedef {object} LangChainVectorSearchResultParams * @augments LangChainEventParams * @property {string} pageContent The stringified contents of the pageContent attribute on each returned search result document. * @property {number} [sequence=0] The index of the document in the search result documents list. + * @property {string} search_id The identifier from the LangChainVectorSearch event. */ /** * @type {LangChainVectorSearchResultParams} @@ -23,13 +23,12 @@ const defaultParams = { } class LangChainVectorSearchResult extends LangChainEvent { - search_id = crypto.randomUUID() - constructor(params) { params = Object.assign({}, defaultParams, params) super(params) const { agent } = params + this.search_id = params.search_id this.sequence = params.sequence if (agent.config.ai_monitoring.record_content.enabled === true) { diff --git a/lib/metrics/names.js b/lib/metrics/names.js index 41b0103211..82d6e3a1a3 100644 --- a/lib/metrics/names.js +++ b/lib/metrics/names.js @@ -170,7 +170,8 @@ const AI = { EMBEDDING: 'Llm/embedding', COMPLETION: 'Llm/completion', TOOL: 'Llm/tool', - CHAIN: 'Llm/chain' + CHAIN: 'Llm/chain', + VECTORSTORE: 'Llm/vectorstore' } AI.OPENAI = { @@ -184,7 +185,8 @@ AI.LANGCHAIN = { EMBEDDING: `${AI.EMBEDDING}/Langchain`, COMPLETION: `${AI.COMPLETION}/Langchain`, TOOL: `${AI.TOOL}/Langchain`, - CHAIN: `${AI.CHAIN}/Langchain` + CHAIN: `${AI.CHAIN}/Langchain`, + VECTORSTORE: `${AI.VECTORSTORE}/Langchain` } const RESTIFY = { diff --git a/test/unit/instrumentation/langchain/vectorstore.test.js b/test/unit/instrumentation/langchain/vectorstore.test.js new file mode 100644 index 0000000000..3c6111ea9f --- /dev/null +++ b/test/unit/instrumentation/langchain/vectorstore.test.js @@ -0,0 +1,68 @@ +/* + * Copyright 2023 New Relic Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +'use strict' + +const { test } = require('tap') +const helper = require('../../../lib/agent_helper') +const GenericShim = require('../../../../lib/shim/shim') +const sinon = require('sinon') + +test('langchain/core/vectorstore unit tests', (t) => { + t.beforeEach(function (t) { + const sandbox = sinon.createSandbox() + const agent = helper.loadMockedAgent() + agent.config.ai_monitoring = { enabled: true } + agent.config.feature_flag = { langchain_instrumentation: true } + const shim = new GenericShim(agent, 'langchain') + shim.pkgVersion = '0.1.26' + sandbox.stub(shim.logger, 'debug') + sandbox.stub(shim.logger, 'warn') + + t.context.agent = agent + t.context.shim = shim + t.context.sandbox = sandbox + t.context.initialize = require('../../../../lib/instrumentation/langchain/vectorstore') + }) + + t.afterEach(function (t) { + helper.unloadAgent(t.context.agent) + t.context.sandbox.restore() + }) + + function getMockModule() { + function VectorStore() {} + VectorStore.prototype.similaritySearch = async function call() {} + return { VectorStore } + } + + ;[ + { aiMonitoring: false, langChain: true }, + { aiMonitoring: true, langChain: false }, + { aiMonitoring: false, langChain: false } + ].forEach(({ aiMonitoring, langChain }) => { + t.test( + `should not register instrumentation if ai_monitoring is ${aiMonitoring} and langchain_instrumentation is ${langChain}`, + (t) => { + const { shim, agent, initialize } = t.context + const MockVectorstore = getMockModule() + agent.config.ai_monitoring.enabled = aiMonitoring + agent.config.feature_flag.langchain_instrumentation = langChain + + initialize(shim, MockVectorstore) + t.equal(shim.logger.debug.callCount, 1, 'should log 1 debug messages') + t.equal( + shim.logger.debug.args[0][0], + 'langchain instrumentation is disabled. To enable set `config.ai_monitoring.enabled` to true' + ) + const isWrapped = shim.isWrapped(MockVectorstore.VectorStore.prototype.similaritySearch) + t.equal(isWrapped, false, 'should not wrap vectorstore similaritySearch') + t.end() + } + ) + }) + + t.end() +}) diff --git a/test/unit/llm-events/langchain/vector-search-result.test.js b/test/unit/llm-events/langchain/vector-search-result.test.js index 7d1f3c49d4..5e12046eab 100644 --- a/test/unit/llm-events/langchain/vector-search-result.test.js +++ b/test/unit/llm-events/langchain/vector-search-result.test.js @@ -7,6 +7,7 @@ const tap = require('tap') const LangChainVectorSearchResult = require('../../../../lib/llm-events/langchain/vector-search-result') +const LangChainVectorSearch = require('../../../../lib/llm-events/langchain/vector-search') tap.beforeEach((t) => { t.context._tx = { @@ -44,6 +45,9 @@ tap.beforeEach((t) => { transaction: { id: 'tx-1', traceId: 'trace-1' + }, + getDurationInMillis() { + return 42 } } @@ -52,12 +56,19 @@ tap.beforeEach((t) => { }) tap.test('create entity', async (t) => { - const search = new LangChainVectorSearchResult({ + const search = new LangChainVectorSearch({ + ...t.context, + query: 'hello world', + k: 1 + }) + + const searchResult = new LangChainVectorSearchResult({ ...t.context, sequence: 1, - pageContent: 'hello world' + pageContent: 'hello world', + search_id: search.id }) - t.match(search, { + t.match(searchResult, { id: /[a-z0-9-]{36}/, appName: 'test-app', ['llm.conversation_id']: 'test-conversation', @@ -71,7 +82,7 @@ tap.test('create entity', async (t) => { virtual_llm: true, sequence: 1, page_content: 'hello world', - search_id: /[a-z0-9-]{36}/ + search_id: search.id }) }) diff --git a/test/unit/llm-events/openai/error.test.js b/test/unit/llm-events/openai/error.test.js index 1deff0925a..24d4a634d2 100644 --- a/test/unit/llm-events/openai/error.test.js +++ b/test/unit/llm-events/openai/error.test.js @@ -18,7 +18,8 @@ tap.test('LlmErrorMessage', (t) => { 'error.code': 'insufficient_quota', 'error.param': 'test-param', 'completion_id': undefined, - 'embedding_id': undefined + 'embedding_id': undefined, + 'vector_store_id': undefined } t.same(errorMsg, expected) t.end() diff --git a/test/versioned/langchain/common.js b/test/versioned/langchain/common.js index a6ff7e64d1..f72da9e272 100644 --- a/test/versioned/langchain/common.js +++ b/test/versioned/langchain/common.js @@ -14,13 +14,62 @@ function filterLangchainEvents(events) { }) } -function filterLangchainMessages(events, msgType) { +function filterLangchainEventsByType(events, msgType) { return events.filter((event) => { const [{ type }] = event return type === msgType }) } +function assertLangChainVectorSearch({ tx, vectorSearch, responseDocumentSize }) { + const expectedSearch = { + 'id': /[a-f0-9]{36}/, + 'appName': 'New Relic for Node.js tests', + 'span_id': tx.trace.root.children[0].id, + 'trace_id': tx.traceId, + 'transaction_id': tx.id, + 'request.k': 1, + 'request.query': 'This is an embedding test.', + 'ingest_source': 'Node', + 'vendor': 'langchain', + 'virtual_llm': true, + ['response.number_of_documents']: responseDocumentSize, + 'duration': tx.trace.root.children[0].getDurationInMillis() + } + + this.equal(vectorSearch[0].type, 'LlmVectorSearch') + this.match(vectorSearch[1], expectedSearch, 'should match vector search') +} + +function assertLangChainVectorSearchResult({ tx, vectorSearchResult, vectorSearchId }) { + const baseSearchResult = { + 'id': /[a-f0-9]{36}/, + 'search_id': vectorSearchId, + 'appName': 'New Relic for Node.js tests', + 'span_id': tx.trace.root.children[0].id, + 'trace_id': tx.traceId, + 'transaction_id': tx.id, + 'ingest_source': 'Node', + 'vendor': 'langchain', + 'metadata.id': '2', + 'virtual_llm': true + } + + vectorSearchResult.forEach((search) => { + const expectedChatMsg = { ...baseSearchResult } + if (search[1].sequence === 0) { + expectedChatMsg.sequence = 0 + expectedChatMsg.page_content = 'This is an embedding test.' + } else if (search[1].sequence === 1) { + expectedChatMsg.sequence = 1 + expectedChatMsg.page_content = '212 degrees Fahrenheit is equal to 100 degrees Celsius.' + } + + this.equal(search[0].type, 'LlmVectorSearchResult') + this.match(search[1], expectedChatMsg, 'should match vector search result') + }) +} + function assertLangChainChatCompletionSummary({ tx, chatSummary, withCallback }) { const expectedSummary = { 'id': /[a-f0-9]{36}/, @@ -93,8 +142,10 @@ function assertLangChainChatCompletionMessages({ tap.Test.prototype.addAssert('langchainMessages', 1, assertLangChainChatCompletionMessages) tap.Test.prototype.addAssert('langchainSummary', 1, assertLangChainChatCompletionSummary) +tap.Test.prototype.addAssert('langchainVectorSearch', 1, assertLangChainVectorSearch) +tap.Test.prototype.addAssert('langchainVectorSearchResult', 1, assertLangChainVectorSearchResult) module.exports = { filterLangchainEvents, - filterLangchainMessages + filterLangchainEventsByType } diff --git a/test/versioned/langchain/package.json b/test/versioned/langchain/package.json index 34ff9d2b19..669c21f129 100644 --- a/test/versioned/langchain/package.json +++ b/test/versioned/langchain/package.json @@ -12,12 +12,15 @@ }, "dependencies": { "@langchain/core": ">=0.1.17", - "@langchain/openai": "latest" + "@langchain/openai": "latest", + "@langchain/community": "latest", + "@elastic/elasticsearch": "latest" }, "files": [ "tools.tap.js", "runnables.tap.js", - "runnables-streaming.tap.js" + "runnables-streaming.tap.js", + "vectorstore.tap.js" ] } ] diff --git a/test/versioned/langchain/runnables-streaming.tap.js b/test/versioned/langchain/runnables-streaming.tap.js index d866890ad3..f94751ec2f 100644 --- a/test/versioned/langchain/runnables-streaming.tap.js +++ b/test/versioned/langchain/runnables-streaming.tap.js @@ -9,7 +9,7 @@ const tap = require('tap') const helper = require('../../lib/agent_helper') // load the assertSegments assertion require('../../lib/metrics_helper') -const { filterLangchainEvents, filterLangchainMessages } = require('./common') +const { filterLangchainEvents, filterLangchainEventsByType } = require('./common') const { version: pkgVersion } = require('@langchain/core/package.json') const createOpenAIMockServer = require('../openai/mock-server') const mockResponses = require('../openai/mock-responses') @@ -135,11 +135,11 @@ tap.test('Langchain instrumentation - chain streaming', (t) => { const events = agent.customEventAggregator.events.toArray() const langchainEvents = filterLangchainEvents(events) - const langChainMessageEvents = filterLangchainMessages( + const langChainMessageEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionMessage' ) - const langChainSummaryEvents = filterLangchainMessages( + const langChainSummaryEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionSummary' ) @@ -180,11 +180,11 @@ tap.test('Langchain instrumentation - chain streaming', (t) => { const events = agent.customEventAggregator.events.toArray() const langchainEvents = filterLangchainEvents(events) - const langChainMessageEvents = filterLangchainMessages( + const langChainMessageEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionMessage' ) - const langChainSummaryEvents = filterLangchainMessages( + const langChainSummaryEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionSummary' ) @@ -229,11 +229,11 @@ tap.test('Langchain instrumentation - chain streaming', (t) => { const events = agent.customEventAggregator.events.toArray() const langchainEvents = filterLangchainEvents(events) - const langChainMessageEvents = filterLangchainMessages( + const langChainMessageEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionMessage' ) - const langChainSummaryEvents = filterLangchainMessages( + const langChainSummaryEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionSummary' ) @@ -322,11 +322,11 @@ tap.test('Langchain instrumentation - chain streaming', (t) => { const events = agent.customEventAggregator.events.toArray() const langchainEvents = filterLangchainEvents(events) - const langChainMessageEvents = filterLangchainMessages( + const langChainMessageEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionMessage' ) - const langChainSummaryEvents = filterLangchainMessages( + const langChainSummaryEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionSummary' ) @@ -429,7 +429,7 @@ tap.test('Langchain instrumentation - chain streaming', (t) => { const events = agent.customEventAggregator.events.toArray() const langchainEvents = filterLangchainEvents(events) - const langChainMessageEvents = filterLangchainMessages( + const langChainMessageEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionMessage' ) diff --git a/test/versioned/langchain/runnables.tap.js b/test/versioned/langchain/runnables.tap.js index ab026fa40b..e4f6f7ec57 100644 --- a/test/versioned/langchain/runnables.tap.js +++ b/test/versioned/langchain/runnables.tap.js @@ -9,7 +9,7 @@ const tap = require('tap') const helper = require('../../lib/agent_helper') // load the assertSegments assertion require('../../lib/metrics_helper') -const { filterLangchainEvents, filterLangchainMessages } = require('./common') +const { filterLangchainEvents, filterLangchainEventsByType } = require('./common') const { version: pkgVersion } = require('@langchain/core/package.json') const createOpenAIMockServer = require('../openai/mock-server') const config = { @@ -116,11 +116,11 @@ tap.test('Langchain instrumentation - runnable sequence', (t) => { const events = agent.customEventAggregator.events.toArray() const langchainEvents = filterLangchainEvents(events) - const langChainMessageEvents = filterLangchainMessages( + const langChainMessageEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionMessage' ) - const langChainSummaryEvents = filterLangchainMessages( + const langChainSummaryEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionSummary' ) @@ -155,11 +155,11 @@ tap.test('Langchain instrumentation - runnable sequence', (t) => { const events = agent.customEventAggregator.events.toArray() const langchainEvents = filterLangchainEvents(events) - const langChainMessageEvents = filterLangchainMessages( + const langChainMessageEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionMessage' ) - const langChainSummaryEvents = filterLangchainMessages( + const langChainSummaryEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionSummary' ) @@ -198,11 +198,11 @@ tap.test('Langchain instrumentation - runnable sequence', (t) => { const events = agent.customEventAggregator.events.toArray() const langchainEvents = filterLangchainEvents(events) - const langChainMessageEvents = filterLangchainMessages( + const langChainMessageEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionMessage' ) - const langChainSummaryEvents = filterLangchainMessages( + const langChainSummaryEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionSummary' ) @@ -280,11 +280,11 @@ tap.test('Langchain instrumentation - runnable sequence', (t) => { const events = agent.customEventAggregator.events.toArray() const langchainEvents = filterLangchainEvents(events) - const langChainMessageEvents = filterLangchainMessages( + const langChainMessageEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionMessage' ) - const langChainSummaryEvents = filterLangchainMessages( + const langChainSummaryEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionSummary' ) @@ -374,7 +374,7 @@ tap.test('Langchain instrumentation - runnable sequence', (t) => { const events = agent.customEventAggregator.events.toArray() const langchainEvents = filterLangchainEvents(events) - const langChainMessageEvents = filterLangchainMessages( + const langChainMessageEvents = filterLangchainEventsByType( langchainEvents, 'LlmChatCompletionMessage' ) diff --git a/test/versioned/langchain/vectorstore.tap.js b/test/versioned/langchain/vectorstore.tap.js new file mode 100644 index 0000000000..2356e7f52b --- /dev/null +++ b/test/versioned/langchain/vectorstore.tap.js @@ -0,0 +1,257 @@ +/* + * Copyright 2024 New Relic Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +'use strict' + +const tap = require('tap') +const helper = require('../../lib/agent_helper') +// load the assertSegments assertion +require('../../lib/metrics_helper') +const { version: pkgVersion } = require('@langchain/core/package.json') +const createOpenAIMockServer = require('../openai/mock-server') +const { filterLangchainEvents, filterLangchainEventsByType } = require('./common') +const { DESTINATIONS } = require('../../../lib/config/attribute-filter') +const params = require('../../lib/params') +const { Document } = require('@langchain/core/documents') + +const config = { + ai_monitoring: { + enabled: true + }, + feature_flag: { + langchain_instrumentation: true + } +} + +tap.test('Langchain instrumentation - vectorstore', (t) => { + t.autoend() + + t.beforeEach(async (t) => { + const { host, port, server } = await createOpenAIMockServer() + t.context.server = server + t.context.agent = helper.instrumentMockedAgent(config) + const { OpenAIEmbeddings } = require('@langchain/openai') + + const { Client } = require('@elastic/elasticsearch') + const clientArgs = { + client: new Client({ + node: `http://${params.elastic_host}:${params.elastic_port}` + }) + } + const { ElasticVectorSearch } = require('@langchain/community/vectorstores/elasticsearch') + + t.context.embedding = new OpenAIEmbeddings({ + openAIApiKey: 'fake-key', + configuration: { + baseURL: `http://${host}:${port}` + } + }) + const docs = [ + new Document({ + metadata: { id: '2' }, + pageContent: 'This is an embedding test.' + }) + ] + const vectorStore = new ElasticVectorSearch(t.context.embedding, clientArgs) + await vectorStore.deleteIfExists() + await vectorStore.addDocuments(docs) + t.context.vs = vectorStore + }) + + t.afterEach(async (t) => { + t.context?.server?.close() + helper.unloadAgent(t.context.agent) + // bust the require-cache so it can re-instrument + Object.keys(require.cache).forEach((key) => { + if ( + key.includes('@langchain/core') || + key.includes('openai') || + key.includes('@elastic') || + key.includes('@langchain/community') + ) { + delete require.cache[key] + } + }) + }) + + t.test('should create vectorstore events for every similarity search call', (t) => { + const { agent, vs } = t.context + + helper.runInNamedTransaction(agent, async (tx) => { + await vs.similaritySearch('This is an embedding test.', 1) + + const events = agent.customEventAggregator.events.toArray() + t.equal(events.length, 3, 'should create 3 events') + + const langchainEvents = events.filter((event) => { + const [, chainEvent] = event + return chainEvent.vendor === 'langchain' + }) + + t.equal(langchainEvents.length, 2, 'should create 2 langchain events') + + tx.end() + t.end() + }) + }) + + t.test('should create span on successful vectorstore create', (t) => { + const { agent, vs } = t.context + helper.runInTransaction(agent, async (tx) => { + const result = await vs.similaritySearch('This is an embedding test.', 1) + t.ok(result) + t.assertSegments(tx.trace.root, ['Llm/vectorstore/Langchain/similaritySearch'], { + exact: false + }) + tx.end() + t.end() + }) + }) + + t.test('should increment tracking metric for each langchain vectorstore event', (t) => { + const { agent, vs } = t.context + + helper.runInTransaction(agent, async (tx) => { + await vs.similaritySearch('This is an embedding test.', 1) + + const metrics = agent.metrics.getOrCreateMetric( + `Supportability/Nodejs/ML/Langchain/${pkgVersion}` + ) + t.equal(metrics.callCount > 0, true) + + tx.end() + t.end() + }) + }) + + t.test( + 'should create vectorstore events for every similarity search call with embeddings', + (t) => { + const { agent, vs } = t.context + + helper.runInNamedTransaction(agent, async (tx) => { + await vs.similaritySearch('This is an embedding test.', 1) + + const events = agent.customEventAggregator.events.toArray() + const langchainEvents = filterLangchainEvents(events) + + const vectorSearchResultEvents = filterLangchainEventsByType( + langchainEvents, + 'LlmVectorSearchResult' + ) + + const vectorSearchEvents = filterLangchainEventsByType(langchainEvents, 'LlmVectorSearch') + + t.langchainVectorSearch({ + tx, + vectorSearch: vectorSearchEvents[0], + responseDocumentSize: 1 + }) + t.langchainVectorSearchResult({ + tx, + vectorSearchResult: vectorSearchResultEvents, + vectorSearchId: vectorSearchEvents[0][1].id + }) + + tx.end() + t.end() + }) + } + ) + + t.test( + 'should create only vectorstore search event for similarity search call with embeddings and invalid metadata filter', + (t) => { + const { agent, vs } = t.context + + helper.runInNamedTransaction(agent, async (tx) => { + // search for documents with invalid filter + await vs.similaritySearch('This is an embedding test.', 1, { + a: 'some filter' + }) + + const events = agent.customEventAggregator.events.toArray() + const langchainEvents = filterLangchainEvents(events) + + const vectorSearchResultEvents = filterLangchainEventsByType( + langchainEvents, + 'LlmVectorSearchResult' + ) + + const vectorSearchEvents = filterLangchainEventsByType(langchainEvents, 'LlmVectorSearch') + + // there are no documents in vector store with that filter + t.equal(vectorSearchResultEvents.length, 0, 'should have 0 events') + t.langchainVectorSearch({ + tx, + vectorSearch: vectorSearchEvents[0], + responseDocumentSize: 0 + }) + + tx.end() + t.end() + }) + } + ) + + t.test('should not create vectorstore events when not in a transaction', async (t) => { + const { agent, vs } = t.context + + await vs.similaritySearch('This is an embedding test.', 1) + + const events = agent.customEventAggregator.events.toArray() + t.equal(events.length, 0, 'should not create vectorstore events') + t.end() + }) + + t.test('should add llm attribute to transaction', (t) => { + const { agent, vs } = t.context + + helper.runInTransaction(agent, async (tx) => { + await vs.similaritySearch('This is an embedding test.', 1) + + const attributes = tx.trace.attributes.get(DESTINATIONS.TRANS_EVENT) + t.equal(attributes.llm, true) + + tx.end() + t.end() + }) + }) + + t.test('should create error events', (t) => { + const { agent, vs } = t.context + + helper.runInNamedTransaction(agent, async (tx) => { + try { + await vs.similaritySearch('Embedding not allowed.', 1) + } catch (error) { + t.ok(error) + } + + const events = agent.customEventAggregator.events.toArray() + // Only LlmEmbedding and LlmVectorSearch events will be created + // LangChainVectorSearchResult event won't be created since there was an error + t.equal(events.length, 2, 'should create 2 events') + + const langchainEvents = events.filter((event) => { + const [, chainEvent] = event + return chainEvent.vendor === 'langchain' + }) + + t.equal(langchainEvents.length, 1, 'should create 1 langchain vectorsearch event') + t.equal(langchainEvents[0][1].error, true) + + // But, we should also get two error events: 1xLLM and 1xLangChain + const exceptions = tx.exceptions + for (const e of exceptions) { + const str = Object.prototype.toString.call(e.customAttributes) + t.equal(str, '[object LlmErrorMessage]') + } + + tx.end() + t.end() + }) + }) +}) diff --git a/test/versioned/openai/mock-responses.js b/test/versioned/openai/mock-responses.js index 5a385abba3..9692540d8c 100644 --- a/test/versioned/openai/mock-responses.js +++ b/test/versioned/openai/mock-responses.js @@ -76,8 +76,8 @@ responses.set('This is an embedding test.', { body: { data: [ { - embedding: - '', + // a small sample of a real embedding response + embedding: [-0.021616805, 0.004173375, 0.002796262, 0.004489489, -0.004940119], index: 0, object: 'embedding' } diff --git a/test/versioned/openai/mock-server.js b/test/versioned/openai/mock-server.js index 7f03f7ba48..47cdb0b431 100644 --- a/test/versioned/openai/mock-server.js +++ b/test/versioned/openai/mock-server.js @@ -153,5 +153,9 @@ function getShortenedPrompt(reqBody) { const prompt = reqBody.prompt || reqBody.input || reqBody.messages.map((m) => m.content).join('\n') + if (Array.isArray(prompt)) { + return prompt[0] + } + return prompt.split('\n')[0] }