From 1c62321a4320a81a6a25aa064e3cdb42d6fbf851 Mon Sep 17 00:00:00 2001
From: Bob Evans <revans@newrelic.com>
Date: Thu, 16 Nov 2023 08:48:12 -0500
Subject: [PATCH] feat: Added instrumentation for openai embedding creation
 (#1872)

---
 lib/instrumentation/openai.js            | 38 ++++++++++++-
 lib/metrics/names.js                     |  5 ++
 test/unit/instrumentation/openai.test.js |  2 +
 test/versioned/openai/openai.tap.js      | 71 +++++++++++++++++++++++-
 4 files changed, 111 insertions(+), 5 deletions(-)

diff --git a/lib/instrumentation/openai.js b/lib/instrumentation/openai.js
index 9ade582120..379c1b4e94 100644
--- a/lib/instrumentation/openai.js
+++ b/lib/instrumentation/openai.js
@@ -7,11 +7,13 @@
 const { openAiHeaders, openAiApiKey } = require('../../lib/symbols')
 const {
   LlmChatCompletionMessage,
-  LlmChatCompletionSummary
+  LlmChatCompletionSummary,
+  LlmEmbedding
 } = require('../../lib/llm-events/openai')
 const LlmTrackedIds = require('../../lib/llm-events/tracked-ids')
 
 const MIN_VERSION = '4.0.0'
+const { AI } = require('../../lib/metrics/names')
 const semver = require('semver')
 
 /**
@@ -104,7 +106,7 @@ module.exports = function initialize(agent, openai, moduleName, shim) {
     function wrapCreate(shim, create, name, args) {
       const [request] = args
       return {
-        name: 'AI/OpenAI/Chat/Completions/Create',
+        name: `${AI.OPEN_AI}/Chat/Completions/Create`,
         promise: true,
         opaque: true,
         // eslint-disable-next-line max-params
@@ -156,4 +158,36 @@ module.exports = function initialize(agent, openai, moduleName, shim) {
       }
     }
   )
+
+  /**
+   * Instruments embedding creation
+   * and creates LlmEmbedding event
+   */
+  shim.record(
+    openai.Embeddings.prototype,
+    'create',
+    function wrapEmbeddingCreate(shim, embeddingCreate, name, args) {
+      const [request] = args
+      return {
+        name: `${AI.OPEN_AI}/Embeddings/Create`,
+        promise: true,
+        // eslint-disable-next-line max-params
+        after(_shim, _fn, _name, err, response, segment) {
+          response.headers = segment[openAiHeaders]
+          response.api_key = segment[openAiApiKey]
+          const embedding = new LlmEmbedding({
+            agent,
+            segment,
+            request,
+            response
+          })
+
+          recordEvent('LlmEmbedding', embedding)
+          // cleanup keys on response before returning to user code
+          delete response.api_key
+          delete response.headers
+        }
+      }
+    }
+  )
 }
diff --git a/lib/metrics/names.js b/lib/metrics/names.js
index ba5e5920ba..80145ffb70 100644
--- a/lib/metrics/names.js
+++ b/lib/metrics/names.js
@@ -164,6 +164,10 @@ const EXPRESS = {
   ERROR_HANDLER: MIDDLEWARE.PREFIX + `${EXPRESS_LITERAL}/`
 }
 
+const AI = {
+  OPEN_AI: 'AI/OpenAI'
+}
+
 const RESTIFY = {
   PREFIX: 'Restify/'
 }
@@ -314,6 +318,7 @@ const LOGGING = {
 
 module.exports = {
   ACTION_DELIMITER: '/',
+  AI,
   ALL: ALL,
   APDEX: 'Apdex',
   CASSANDRA: CASSANDRA,
diff --git a/test/unit/instrumentation/openai.test.js b/test/unit/instrumentation/openai.test.js
index 6bcb521a9d..d930f2b0b9 100644
--- a/test/unit/instrumentation/openai.test.js
+++ b/test/unit/instrumentation/openai.test.js
@@ -39,6 +39,8 @@ test('openai unit tests', (t) => {
     function OpenAI() {}
     OpenAI.prototype.makeRequest = function () {}
     OpenAI.Chat = { Completions }
+    OpenAI.Embeddings = function () {}
+    OpenAI.Embeddings.prototype.create = function () {}
     return OpenAI
   }
 
diff --git a/test/versioned/openai/openai.tap.js b/test/versioned/openai/openai.tap.js
index 68e4a7ceb4..23a80f6e30 100644
--- a/test/versioned/openai/openai.tap.js
+++ b/test/versioned/openai/openai.tap.js
@@ -8,6 +8,7 @@
 const tap = require('tap')
 const helper = require('../../lib/agent_helper')
 const createOpenAIMockServer = require('../../lib/openai-mock-server')
+const { assertSegments } = require('../../lib/metrics_helper')
 // TODO: remove config once we fully release OpenAI instrumentation
 const config = {
   feature_flag: {
@@ -20,6 +21,8 @@ tap.test('OpenAI instrumentation', (t) => {
 
   t.before(async () => {
     const { host, port, server } = await createOpenAIMockServer()
+    t.context.host = host
+    t.context.port = port
     t.context.server = server
     t.context.agent = helper.instrumentMockedAgent(config)
     const OpenAI = require('openai')
@@ -45,8 +48,8 @@ tap.test('OpenAI instrumentation', (t) => {
         messages: [{ role: 'user', content: 'You are a mathematician.' }]
       })
 
-      test.not(results.headers, 'should remove response headers from user result')
-      test.not(results.api_key, 'should remove api_key from user result')
+      test.notOk(results.headers, 'should remove response headers from user result')
+      test.notOk(results.api_key, 'should remove api_key from user result')
       test.equal(results.choices[0].message.content, '1 plus 2 is 3.')
 
       const [span] = tx.trace.root.children
@@ -162,7 +165,6 @@ tap.test('OpenAI instrumentation', (t) => {
           'chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTeat-1'
         ]
       })
-
       test.end()
     })
   })
@@ -207,7 +209,70 @@ tap.test('OpenAI instrumentation', (t) => {
           })
         })
       )
+      test.end()
+    })
+  })
+
+  t.test('should create embedding span on successful embedding create', (test) => {
+    const { client, agent, host, port } = t.context
+    helper.runInTransaction(agent, async (tx) => {
+      const results = await client.embeddings.create({
+        input: 'This is an embedding test.',
+        model: 'text-embedding-ada-002'
+      })
+
+      test.notOk(results.headers, 'should remove response headers from user result')
+      test.notOk(results.api_key, 'should remove api_key from user result')
+      test.equal(results.model, 'text-embedding-ada-002-v2')
+
+      test.doesNotThrow(() => {
+        assertSegments(
+          tx.trace.root,
+          ['AI/OpenAI/Embeddings/Create', [`External/${host}:${port}/embeddings`]],
+          { exact: false }
+        )
+      }, 'should have expected segments')
+      test.end()
+    })
+  })
+
+  t.test('should create embedding message for an embedding', (test) => {
+    const { client, agent } = t.context
+    helper.runInTransaction(agent, async (tx) => {
+      await client.embeddings.create({
+        input: 'This is an embedding test.',
+        model: 'text-embedding-ada-002'
+      })
+      const events = agent.customEventAggregator.events.toArray()
+      test.equal(events.length, 1, 'should create a chat completion message and summary event')
+      const [embedding] = events
+      const expectedEmbedding = {
+        'id': /[a-f0-9]{36}/,
+        'appName': 'New Relic for Node.js tests',
+        'request_id': 'c70828b2293314366a76a2b1dcb20688',
+        'trace_id': tx.traceId,
+        'span_id': tx.trace.root.children[0].id,
+        'transaction_id': tx.id,
+        'response.model': 'text-embedding-ada-002-v2',
+        'vendor': 'openAI',
+        'ingest_source': 'Node',
+        'request.model': 'text-embedding-ada-002',
+        'duration': tx.trace.root.children[0].getExclusiveDurationInMillis(),
+        'api_key_last_four_digits': 'sk--key',
+        'response.organization': 'new-relic-nkmd8b',
+        'response.usage.total_tokens': 6,
+        'response.usage.prompt_tokens': 6,
+        'response.headers.llmVersion': '2020-10-01',
+        'response.headers.ratelimitLimitRequests': '200',
+        'response.headers.ratelimitLimitTokens': '150000',
+        'response.headers.ratelimitResetTokens': '2ms',
+        'response.headers.ratelimitRemainingTokens': '149994',
+        'response.headers.ratelimitRemainingRequests': '197',
+        'input': 'This is an embedding test.'
+      }
 
+      test.equal(embedding[0].type, 'LlmEmbedding')
+      test.match(embedding[1], expectedEmbedding, 'should match embedding message')
       test.end()
     })
   })