feat: Added instrumentation for VectorStore.similaritySearch for lang…

…chain.js (#2049) Co-authored-by: Bob Evans <robert.evans25@gmail.com>
newrelic · Mar 5, 2024 · 3c50606 · 3c50606
1 parent 47a925e
commit 3c50606
Show file tree

Hide file tree

Showing 16 changed files with 551 additions and 38 deletions.
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -2,7 +2,7 @@ version: "3"
 services:
   elasticsearch:
     container_name: nr_node_elastic
-    image: docker.elastic.co/elasticsearch/elasticsearch:8.7.1
+    image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0
     environment:
       - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
       # Set cluster to single node

diff --git a/lib/instrumentation/langchain/nr-hooks.js b/lib/instrumentation/langchain/nr-hooks.js
@@ -7,6 +7,7 @@
 const toolsInstrumentation = require('./tools')
 const cbManagerInstrumentation = require('./callback-manager')
 const runnableInstrumentation = require('./runnable')
+const vectorstoreInstrumentation = require('./vectorstore')
 
 module.exports = [
   {
@@ -23,5 +24,10 @@ module.exports = [
     type: 'generic',
     moduleName: '@langchain/core/dist/runnables/base',
     onRequire: runnableInstrumentation
+  },
+  {
+    type: 'generic',
+    moduleName: '@langchain/core/vectorstores',
+    onRequire: vectorstoreInstrumentation
   }
 ]
diff --git a/lib/instrumentation/langchain/vectorstore.js b/lib/instrumentation/langchain/vectorstore.js
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2024 New Relic Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+'use strict'
+const {
+  AI: { LANGCHAIN }
+} = require('../../metrics/names')
+const { LangChainVectorSearch, LangChainVectorSearchResult } = require('../../llm-events/langchain')
+const { recordEvent, shouldSkipInstrumentation } = require('./common')
+const { DESTINATIONS } = require('../../config/attribute-filter')
+const { RecorderSpec } = require('../../shim/specs')
+const LlmErrorMessage = require('../../llm-events/error-message')
+
+/**
+ * Generates a LangChainVectorSearch for entire search request.
+ * Also iterates over documents in output and generates a
+ * LangChainVectorSearchResult for each document.
+ *
+ * @param {object} params input params
+ * @param {string} params.request vector search query
+ * @param {number} params.k vector search top k
+ * @param {object} params.output vector search documents
+ * @param {Agent} params.agent NR agent instance
+ * @param {TraceSegment} params.segment active segment from vector search
+ * @param {string} params.pkgVersion langchain version
+ * @param {err} params.err if it exists
+ */
+function recordVectorSearch({ request, k, output, agent, segment, pkgVersion, err }) {
+  const vectorSearch = new LangChainVectorSearch({
+    agent,
+    segment,
+    query: request,
+    k,
+    documents: output,
+    error: err !== null
+  })
+
+  recordEvent({ agent, type: 'LlmVectorSearch', pkgVersion, msg: vectorSearch })
+
+  output.forEach((document, sequence) => {
+    const vectorSearchResult = new LangChainVectorSearchResult({
+      agent,
+      segment,
+      metadata: document.metadata,
+      pageContent: document.pageContent,
+      sequence,
+      search_id: vectorSearch.id
+    })
+
+    recordEvent({
+      agent,
+      type: 'LlmVectorSearchResult',
+      pkgVersion,
+      msg: vectorSearchResult
+    })
+  })
+
+  if (err) {
+    agent.errors.add(
+      segment.transaction,
+      err,
+      new LlmErrorMessage({
+        response: output,
+        cause: err,
+        vectorsearch: vectorSearch
+      })
+    )
+  }
+}
+
+module.exports = function initialize(shim, vectorstores) {
+  const { agent, pkgVersion } = shim
+
+  if (shouldSkipInstrumentation(agent.config)) {
+    shim.logger.debug(
+      'langchain instrumentation is disabled.  To enable set `config.ai_monitoring.enabled` to true'
+    )
+    return
+  }
+
+  shim.record(
+    vectorstores.VectorStore.prototype,
+    'similaritySearch',
+    function wrapCall(shim, similaritySearch, fnName, args) {
+      const [request, k] = args
+
+      return new RecorderSpec({
+        name: `${LANGCHAIN.VECTORSTORE}/${fnName}`,
+        promise: true,
+        // eslint-disable-next-line max-params
+        after(_shim, _fn, _name, err, output, segment) {
+          if (!output) {
+            // If we get an error, it is possible that `output = null`.
+            // In that case, we define it to be an empty array.
+            output = []
+          }
+
+          segment.end()
+          recordVectorSearch({ request, k, output, agent, segment, pkgVersion, err })
+
+          segment.transaction.trace.attributes.addAttribute(DESTINATIONS.TRANS_EVENT, 'llm', true)
+        }
+      })
+    }
+  )
+}
diff --git a/lib/llm-events/error-message.js b/lib/llm-events/error-message.js
@@ -18,14 +18,17 @@ module.exports = class LlmErrorMessage {
    * conversation if it was a chat completion conversation.
    * @param {LlmEmbedding} [params.embedding] Details about the conversation
    * if it was an embedding conversation.
+   * @param {LlmVectorStoreSearch} [params.vectorsearch] Details about the vector
+   * search if it was a vector search event.
    */
-  constructor({ response, cause, summary, embedding } = {}) {
+  constructor({ response, cause, summary, embedding, vectorsearch } = {}) {
     this['http.statusCode'] = response?.status ?? cause?.status
     this['error.message'] = cause?.message
     this['error.code'] = response?.code ?? cause?.error?.code
     this['error.param'] = response?.param ?? cause?.error?.param
     this.completion_id = summary?.id
     this.embedding_id = embedding?.id
+    this.vector_store_id = vectorsearch?.id
   }
 
   get [Symbol.toStringTag]() {

diff --git a/lib/llm-events/langchain/vector-search-result.js b/lib/llm-events/langchain/vector-search-result.js
@@ -6,13 +6,13 @@
 'use strict'
 
 const LangChainEvent = require('./event')
-const crypto = require('crypto')
 
 /**
  * @typedef {object} LangChainVectorSearchResultParams
  * @augments LangChainEventParams
  * @property {string} pageContent The stringified contents of the pageContent attribute on each returned search result document.
  * @property {number} [sequence=0] The index of the document in the search result documents list.
+ * @property {string} search_id The identifier from the LangChainVectorSearch event.
  */
 /**
  * @type {LangChainVectorSearchResultParams}
@@ -23,13 +23,12 @@ const defaultParams = {
 }
 
 class LangChainVectorSearchResult extends LangChainEvent {
-  search_id = crypto.randomUUID()
-
   constructor(params) {
     params = Object.assign({}, defaultParams, params)
     super(params)
     const { agent } = params
 
+    this.search_id = params.search_id
     this.sequence = params.sequence
 
     if (agent.config.ai_monitoring.record_content.enabled === true) {

diff --git a/lib/metrics/names.js b/lib/metrics/names.js
@@ -170,7 +170,8 @@ const AI = {
   EMBEDDING: 'Llm/embedding',
   COMPLETION: 'Llm/completion',
   TOOL: 'Llm/tool',
-  CHAIN: 'Llm/chain'
+  CHAIN: 'Llm/chain',
+  VECTORSTORE: 'Llm/vectorstore'
 }
 
 AI.OPENAI = {
@@ -184,7 +185,8 @@ AI.LANGCHAIN = {
   EMBEDDING: `${AI.EMBEDDING}/Langchain`,
   COMPLETION: `${AI.COMPLETION}/Langchain`,
   TOOL: `${AI.TOOL}/Langchain`,
-  CHAIN: `${AI.CHAIN}/Langchain`
+  CHAIN: `${AI.CHAIN}/Langchain`,
+  VECTORSTORE: `${AI.VECTORSTORE}/Langchain`
 }
 
 const RESTIFY = {

diff --git a/test/unit/instrumentation/langchain/vectorstore.test.js b/test/unit/instrumentation/langchain/vectorstore.test.js
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2023 New Relic Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+'use strict'
+
+const { test } = require('tap')
+const helper = require('../../../lib/agent_helper')
+const GenericShim = require('../../../../lib/shim/shim')
+const sinon = require('sinon')
+
+test('langchain/core/vectorstore unit tests', (t) => {
+  t.beforeEach(function (t) {
+    const sandbox = sinon.createSandbox()
+    const agent = helper.loadMockedAgent()
+    agent.config.ai_monitoring = { enabled: true }
+    agent.config.feature_flag = { langchain_instrumentation: true }
+    const shim = new GenericShim(agent, 'langchain')
+    shim.pkgVersion = '0.1.26'
+    sandbox.stub(shim.logger, 'debug')
+    sandbox.stub(shim.logger, 'warn')
+
+    t.context.agent = agent
+    t.context.shim = shim
+    t.context.sandbox = sandbox
+    t.context.initialize = require('../../../../lib/instrumentation/langchain/vectorstore')
+  })
+
+  t.afterEach(function (t) {
+    helper.unloadAgent(t.context.agent)
+    t.context.sandbox.restore()
+  })
+
+  function getMockModule() {
+    function VectorStore() {}
+    VectorStore.prototype.similaritySearch = async function call() {}
+    return { VectorStore }
+  }
+
+  ;[
+    { aiMonitoring: false, langChain: true },
+    { aiMonitoring: true, langChain: false },
+    { aiMonitoring: false, langChain: false }
+  ].forEach(({ aiMonitoring, langChain }) => {
+    t.test(
+      `should not register instrumentation if ai_monitoring is ${aiMonitoring} and langchain_instrumentation is ${langChain}`,
+      (t) => {
+        const { shim, agent, initialize } = t.context
+        const MockVectorstore = getMockModule()
+        agent.config.ai_monitoring.enabled = aiMonitoring
+        agent.config.feature_flag.langchain_instrumentation = langChain
+
+        initialize(shim, MockVectorstore)
+        t.equal(shim.logger.debug.callCount, 1, 'should log 1 debug messages')
+        t.equal(
+          shim.logger.debug.args[0][0],
+          'langchain instrumentation is disabled.  To enable set `config.ai_monitoring.enabled` to true'
+        )
+        const isWrapped = shim.isWrapped(MockVectorstore.VectorStore.prototype.similaritySearch)
+        t.equal(isWrapped, false, 'should not wrap vectorstore similaritySearch')
+        t.end()
+      }
+    )
+  })
+
+  t.end()
+})
diff --git a/test/unit/llm-events/langchain/vector-search-result.test.js b/test/unit/llm-events/langchain/vector-search-result.test.js
@@ -7,6 +7,7 @@
 
 const tap = require('tap')
 const LangChainVectorSearchResult = require('../../../../lib/llm-events/langchain/vector-search-result')
+const LangChainVectorSearch = require('../../../../lib/llm-events/langchain/vector-search')
 
 tap.beforeEach((t) => {
   t.context._tx = {
@@ -44,6 +45,9 @@ tap.beforeEach((t) => {
     transaction: {
       id: 'tx-1',
       traceId: 'trace-1'
+    },
+    getDurationInMillis() {
+      return 42
     }
   }
 
@@ -52,12 +56,19 @@ tap.beforeEach((t) => {
 })
 
 tap.test('create entity', async (t) => {
-  const search = new LangChainVectorSearchResult({
+  const search = new LangChainVectorSearch({
+    ...t.context,
+    query: 'hello world',
+    k: 1
+  })
+
+  const searchResult = new LangChainVectorSearchResult({
     ...t.context,
     sequence: 1,
-    pageContent: 'hello world'
+    pageContent: 'hello world',
+    search_id: search.id
   })
-  t.match(search, {
+  t.match(searchResult, {
     id: /[a-z0-9-]{36}/,
     appName: 'test-app',
     ['llm.conversation_id']: 'test-conversation',
@@ -71,7 +82,7 @@ tap.test('create entity', async (t) => {
     virtual_llm: true,
     sequence: 1,
     page_content: 'hello world',
-    search_id: /[a-z0-9-]{36}/
+    search_id: search.id
   })
 })
 

diff --git a/test/unit/llm-events/openai/error.test.js b/test/unit/llm-events/openai/error.test.js
@@ -18,7 +18,8 @@ tap.test('LlmErrorMessage', (t) => {
     'error.code': 'insufficient_quota',
     'error.param': 'test-param',
     'completion_id': undefined,
-    'embedding_id': undefined
+    'embedding_id': undefined,
+    'vector_store_id': undefined
   }
   t.same(errorMsg, expected)
   t.end()