Skip to content

Commit

Permalink
feat: Added instrumentation for VectorStore.similaritySearch for lang…
Browse files Browse the repository at this point in the history
…chain.js (#2049)

Co-authored-by: Bob Evans <robert.evans25@gmail.com>
  • Loading branch information
svetlanabrennan and bizob2828 committed Mar 5, 2024
1 parent 47a925e commit 3c50606
Show file tree
Hide file tree
Showing 16 changed files with 551 additions and 38 deletions.
2 changes: 1 addition & 1 deletion docker-compose.yml
Expand Up @@ -2,7 +2,7 @@ version: "3"
services:
elasticsearch:
container_name: nr_node_elastic
image: docker.elastic.co/elasticsearch/elasticsearch:8.7.1
image: docker.elastic.co/elasticsearch/elasticsearch:8.8.0
environment:
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
# Set cluster to single node
Expand Down
6 changes: 6 additions & 0 deletions lib/instrumentation/langchain/nr-hooks.js
Expand Up @@ -7,6 +7,7 @@
const toolsInstrumentation = require('./tools')
const cbManagerInstrumentation = require('./callback-manager')
const runnableInstrumentation = require('./runnable')
const vectorstoreInstrumentation = require('./vectorstore')

module.exports = [
{
Expand All @@ -23,5 +24,10 @@ module.exports = [
type: 'generic',
moduleName: '@langchain/core/dist/runnables/base',
onRequire: runnableInstrumentation
},
{
type: 'generic',
moduleName: '@langchain/core/vectorstores',
onRequire: vectorstoreInstrumentation
}
]
108 changes: 108 additions & 0 deletions lib/instrumentation/langchain/vectorstore.js
@@ -0,0 +1,108 @@
/*
* Copyright 2024 New Relic Corporation. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/

'use strict'
const {
AI: { LANGCHAIN }
} = require('../../metrics/names')
const { LangChainVectorSearch, LangChainVectorSearchResult } = require('../../llm-events/langchain')
const { recordEvent, shouldSkipInstrumentation } = require('./common')
const { DESTINATIONS } = require('../../config/attribute-filter')
const { RecorderSpec } = require('../../shim/specs')
const LlmErrorMessage = require('../../llm-events/error-message')

/**
* Generates a LangChainVectorSearch for entire search request.
* Also iterates over documents in output and generates a
* LangChainVectorSearchResult for each document.
*
* @param {object} params input params
* @param {string} params.request vector search query
* @param {number} params.k vector search top k
* @param {object} params.output vector search documents
* @param {Agent} params.agent NR agent instance
* @param {TraceSegment} params.segment active segment from vector search
* @param {string} params.pkgVersion langchain version
* @param {err} params.err if it exists
*/
function recordVectorSearch({ request, k, output, agent, segment, pkgVersion, err }) {
const vectorSearch = new LangChainVectorSearch({
agent,
segment,
query: request,
k,
documents: output,
error: err !== null
})

recordEvent({ agent, type: 'LlmVectorSearch', pkgVersion, msg: vectorSearch })

output.forEach((document, sequence) => {
const vectorSearchResult = new LangChainVectorSearchResult({
agent,
segment,
metadata: document.metadata,
pageContent: document.pageContent,
sequence,
search_id: vectorSearch.id
})

recordEvent({
agent,
type: 'LlmVectorSearchResult',
pkgVersion,
msg: vectorSearchResult
})
})

if (err) {
agent.errors.add(
segment.transaction,
err,
new LlmErrorMessage({
response: output,
cause: err,
vectorsearch: vectorSearch
})
)
}
}

module.exports = function initialize(shim, vectorstores) {
const { agent, pkgVersion } = shim

if (shouldSkipInstrumentation(agent.config)) {
shim.logger.debug(
'langchain instrumentation is disabled. To enable set `config.ai_monitoring.enabled` to true'
)
return
}

shim.record(
vectorstores.VectorStore.prototype,
'similaritySearch',
function wrapCall(shim, similaritySearch, fnName, args) {
const [request, k] = args

return new RecorderSpec({
name: `${LANGCHAIN.VECTORSTORE}/${fnName}`,
promise: true,
// eslint-disable-next-line max-params
after(_shim, _fn, _name, err, output, segment) {
if (!output) {
// If we get an error, it is possible that `output = null`.
// In that case, we define it to be an empty array.
output = []
}

segment.end()
recordVectorSearch({ request, k, output, agent, segment, pkgVersion, err })

segment.transaction.trace.attributes.addAttribute(DESTINATIONS.TRANS_EVENT, 'llm', true)
}
})
}
)
}
5 changes: 4 additions & 1 deletion lib/llm-events/error-message.js
Expand Up @@ -18,14 +18,17 @@ module.exports = class LlmErrorMessage {
* conversation if it was a chat completion conversation.
* @param {LlmEmbedding} [params.embedding] Details about the conversation
* if it was an embedding conversation.
* @param {LlmVectorStoreSearch} [params.vectorsearch] Details about the vector
* search if it was a vector search event.
*/
constructor({ response, cause, summary, embedding } = {}) {
constructor({ response, cause, summary, embedding, vectorsearch } = {}) {
this['http.statusCode'] = response?.status ?? cause?.status
this['error.message'] = cause?.message
this['error.code'] = response?.code ?? cause?.error?.code
this['error.param'] = response?.param ?? cause?.error?.param
this.completion_id = summary?.id
this.embedding_id = embedding?.id
this.vector_store_id = vectorsearch?.id
}

get [Symbol.toStringTag]() {
Expand Down
5 changes: 2 additions & 3 deletions lib/llm-events/langchain/vector-search-result.js
Expand Up @@ -6,13 +6,13 @@
'use strict'

const LangChainEvent = require('./event')
const crypto = require('crypto')

/**
* @typedef {object} LangChainVectorSearchResultParams
* @augments LangChainEventParams
* @property {string} pageContent The stringified contents of the pageContent attribute on each returned search result document.
* @property {number} [sequence=0] The index of the document in the search result documents list.
* @property {string} search_id The identifier from the LangChainVectorSearch event.
*/
/**
* @type {LangChainVectorSearchResultParams}
Expand All @@ -23,13 +23,12 @@ const defaultParams = {
}

class LangChainVectorSearchResult extends LangChainEvent {
search_id = crypto.randomUUID()

constructor(params) {
params = Object.assign({}, defaultParams, params)
super(params)
const { agent } = params

this.search_id = params.search_id
this.sequence = params.sequence

if (agent.config.ai_monitoring.record_content.enabled === true) {
Expand Down
6 changes: 4 additions & 2 deletions lib/metrics/names.js
Expand Up @@ -170,7 +170,8 @@ const AI = {
EMBEDDING: 'Llm/embedding',
COMPLETION: 'Llm/completion',
TOOL: 'Llm/tool',
CHAIN: 'Llm/chain'
CHAIN: 'Llm/chain',
VECTORSTORE: 'Llm/vectorstore'
}

AI.OPENAI = {
Expand All @@ -184,7 +185,8 @@ AI.LANGCHAIN = {
EMBEDDING: `${AI.EMBEDDING}/Langchain`,
COMPLETION: `${AI.COMPLETION}/Langchain`,
TOOL: `${AI.TOOL}/Langchain`,
CHAIN: `${AI.CHAIN}/Langchain`
CHAIN: `${AI.CHAIN}/Langchain`,
VECTORSTORE: `${AI.VECTORSTORE}/Langchain`
}

const RESTIFY = {
Expand Down
68 changes: 68 additions & 0 deletions test/unit/instrumentation/langchain/vectorstore.test.js
@@ -0,0 +1,68 @@
/*
* Copyright 2023 New Relic Corporation. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/

'use strict'

const { test } = require('tap')
const helper = require('../../../lib/agent_helper')
const GenericShim = require('../../../../lib/shim/shim')
const sinon = require('sinon')

test('langchain/core/vectorstore unit tests', (t) => {
t.beforeEach(function (t) {
const sandbox = sinon.createSandbox()
const agent = helper.loadMockedAgent()
agent.config.ai_monitoring = { enabled: true }
agent.config.feature_flag = { langchain_instrumentation: true }
const shim = new GenericShim(agent, 'langchain')
shim.pkgVersion = '0.1.26'
sandbox.stub(shim.logger, 'debug')
sandbox.stub(shim.logger, 'warn')

t.context.agent = agent
t.context.shim = shim
t.context.sandbox = sandbox
t.context.initialize = require('../../../../lib/instrumentation/langchain/vectorstore')
})

t.afterEach(function (t) {
helper.unloadAgent(t.context.agent)
t.context.sandbox.restore()
})

function getMockModule() {
function VectorStore() {}
VectorStore.prototype.similaritySearch = async function call() {}
return { VectorStore }
}

;[
{ aiMonitoring: false, langChain: true },
{ aiMonitoring: true, langChain: false },
{ aiMonitoring: false, langChain: false }
].forEach(({ aiMonitoring, langChain }) => {
t.test(
`should not register instrumentation if ai_monitoring is ${aiMonitoring} and langchain_instrumentation is ${langChain}`,
(t) => {
const { shim, agent, initialize } = t.context
const MockVectorstore = getMockModule()
agent.config.ai_monitoring.enabled = aiMonitoring
agent.config.feature_flag.langchain_instrumentation = langChain

initialize(shim, MockVectorstore)
t.equal(shim.logger.debug.callCount, 1, 'should log 1 debug messages')
t.equal(
shim.logger.debug.args[0][0],
'langchain instrumentation is disabled. To enable set `config.ai_monitoring.enabled` to true'
)
const isWrapped = shim.isWrapped(MockVectorstore.VectorStore.prototype.similaritySearch)
t.equal(isWrapped, false, 'should not wrap vectorstore similaritySearch')
t.end()
}
)
})

t.end()
})
19 changes: 15 additions & 4 deletions test/unit/llm-events/langchain/vector-search-result.test.js
Expand Up @@ -7,6 +7,7 @@

const tap = require('tap')
const LangChainVectorSearchResult = require('../../../../lib/llm-events/langchain/vector-search-result')
const LangChainVectorSearch = require('../../../../lib/llm-events/langchain/vector-search')

tap.beforeEach((t) => {
t.context._tx = {
Expand Down Expand Up @@ -44,6 +45,9 @@ tap.beforeEach((t) => {
transaction: {
id: 'tx-1',
traceId: 'trace-1'
},
getDurationInMillis() {
return 42
}
}

Expand All @@ -52,12 +56,19 @@ tap.beforeEach((t) => {
})

tap.test('create entity', async (t) => {
const search = new LangChainVectorSearchResult({
const search = new LangChainVectorSearch({
...t.context,
query: 'hello world',
k: 1
})

const searchResult = new LangChainVectorSearchResult({
...t.context,
sequence: 1,
pageContent: 'hello world'
pageContent: 'hello world',
search_id: search.id
})
t.match(search, {
t.match(searchResult, {
id: /[a-z0-9-]{36}/,
appName: 'test-app',
['llm.conversation_id']: 'test-conversation',
Expand All @@ -71,7 +82,7 @@ tap.test('create entity', async (t) => {
virtual_llm: true,
sequence: 1,
page_content: 'hello world',
search_id: /[a-z0-9-]{36}/
search_id: search.id
})
})

Expand Down
3 changes: 2 additions & 1 deletion test/unit/llm-events/openai/error.test.js
Expand Up @@ -18,7 +18,8 @@ tap.test('LlmErrorMessage', (t) => {
'error.code': 'insufficient_quota',
'error.param': 'test-param',
'completion_id': undefined,
'embedding_id': undefined
'embedding_id': undefined,
'vector_store_id': undefined
}
t.same(errorMsg, expected)
t.end()
Expand Down

0 comments on commit 3c50606

Please sign in to comment.