Add support for Meta Llama2. (#1010)

* Add support for Llama2. * Fixup: lint errors * [Mega-Linter] Apply linters fixes * Trigger tests --------- Co-authored-by: Hannah Stepanek <hstepanek@newrelic.com> Co-authored-by: hmstepanek <hmstepanek@users.noreply.github.com>
newrelic · Dec 21, 2023 · 7051455 · 7051455
1 parent 3d3aa4f
commit 7051455
Show file tree

Hide file tree

Showing 5 changed files with 180 additions and 10 deletions.
diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py
@@ -144,7 +144,7 @@ def create_chat_completion_message_event(
             "response.model": request_model,
             "vendor": "bedrock",
             "ingest_source": "Python",
-            "is_response": True
+            "is_response": True,
         }
         transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_message_dict)
 
@@ -246,7 +246,7 @@ def extract_bedrock_claude_model(request_body, response_body=None):
     chat_completion_summary_dict = {
         "request.max_tokens": request_body.get("max_tokens_to_sample", ""),
         "request.temperature": request_body.get("temperature", ""),
-        "response.number_of_messages": len(input_message_list)
+        "response.number_of_messages": len(input_message_list),
     }
 
     if response_body:
@@ -264,6 +264,40 @@ def extract_bedrock_claude_model(request_body, response_body=None):
     return input_message_list, output_message_list, chat_completion_summary_dict
 
 
+def extract_bedrock_llama_model(request_body, response_body=None):
+    request_body = json.loads(request_body)
+    if response_body:
+        response_body = json.loads(response_body)
+
+    input_message_list = [{"role": "user", "content": request_body.get("prompt", "")}]
+
+    chat_completion_summary_dict = {
+        "request.max_tokens": request_body.get("max_gen_len", ""),
+        "request.temperature": request_body.get("temperature", ""),
+        "response.number_of_messages": len(input_message_list),
+    }
+
+    if response_body:
+        output_message_list = [{"role": "assistant", "content": response_body.get("generation", "")}]
+        prompt_tokens = response_body.get("prompt_token_count", None)
+        completion_tokens = response_body.get("generation_token_count", None)
+        total_tokens = prompt_tokens + completion_tokens if prompt_tokens and completion_tokens else None
+
+        chat_completion_summary_dict.update(
+            {
+                "response.usage.completion_tokens": completion_tokens,
+                "response.usage.prompt_tokens": prompt_tokens,
+                "response.usage.total_tokens": total_tokens,
+                "response.choices.finish_reason": response_body.get("stop_reason", ""),
+                "response.number_of_messages": len(input_message_list) + len(output_message_list),
+            }
+        )
+    else:
+        output_message_list = []
+
+    return input_message_list, output_message_list, chat_completion_summary_dict
+
+
 def extract_bedrock_cohere_model(request_body, response_body=None):
     request_body = json.loads(request_body)
     if response_body:
@@ -274,7 +308,7 @@ def extract_bedrock_cohere_model(request_body, response_body=None):
     chat_completion_summary_dict = {
         "request.max_tokens": request_body.get("max_tokens", ""),
         "request.temperature": request_body.get("temperature", ""),
-        "response.number_of_messages": len(input_message_list)
+        "response.number_of_messages": len(input_message_list),
     }
 
     if response_body:
@@ -300,6 +334,7 @@ def extract_bedrock_cohere_model(request_body, response_body=None):
     ("ai21.j2", extract_bedrock_ai21_j2_model),
     ("cohere", extract_bedrock_cohere_model),
     ("anthropic.claude", extract_bedrock_claude_model),
+    ("meta.llama2", extract_bedrock_llama_model),
 ]
 
 
@@ -368,7 +403,7 @@ def wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs):
                 notice_error_attributes = {
                     "http.statusCode": error_attributes["http.statusCode"],
                     "error.message": error_attributes["error.message"],
-                    "error.code": error_attributes["error.code"]
+                    "error.code": error_attributes["error.code"],
                 }
 
                 if is_embedding:
@@ -392,7 +427,7 @@ def wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs):
                         ft.duration,
                         True,
                         trace_id,
-                        span_id
+                        span_id,
                     )
                 else:
                     handle_chat_completion_event(
@@ -406,7 +441,7 @@ def wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs):
                         ft.duration,
                         True,
                         trace_id,
-                        span_id
+                        span_id,
                     )
 
             finally:
@@ -463,7 +498,7 @@ def handle_embedding_event(
     duration,
     is_error,
     trace_id,
-    span_id
+    span_id,
 ):
     embedding_id = str(uuid.uuid4())
 
@@ -508,7 +543,7 @@ def handle_chat_completion_event(
     duration,
     is_error,
     trace_id,
-    span_id
+    span_id,
 ):
     custom_attrs_dict = transaction._custom_params
     conversation_id = custom_attrs_dict.get("llm.conversation_id", "")

diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py
@@ -864,7 +864,7 @@ def wrap_base_client_process_response(wrapped, instance, args, kwargs):
     nr_response_headers = getattr(response, "headers")
 
     return_val = wrapped(*args, **kwargs)
-    # Obtain reponse headers for v1
+    # Obtain response headers for v1
     return_val._nr_response_headers = nr_response_headers
     return return_val
 

diff --git a/tests/external_botocore/_mock_external_bedrock_server.py b/tests/external_botocore/_mock_external_bedrock_server.py
@@ -3332,6 +3332,16 @@
             "prompt": "What is 212 degrees Fahrenheit converted to Celsius?",
         },
     ],
+    "meta.llama2-13b-chat-v1::What is 212 degrees Fahrenheit converted to Celsius?": [
+        {"Content-Type": "application/json", "x-amzn-RequestId": "9a64cdb0-3e82-41c7-873a-c12a77e0143a"},
+        200,
+        {
+            "generation": " Here's the answer:\n\n212°F = 100°C\n\nSo, 212 degrees Fahrenheit is equal to 100 degrees Celsius.",
+            "prompt_token_count": 17,
+            "generation_token_count": 46,
+            "stop_reason": "stop",
+        },
+    ],
     "does-not-exist::": [
         {
             "Content-Type": "application/json",
@@ -3395,6 +3405,15 @@
         403,
         {"message": "The security token included in the request is invalid."},
     ],
+    "meta.llama2-13b-chat-v1::Invalid Token": [
+        {
+            "Content-Type": "application/json",
+            "x-amzn-RequestId": "22476490-a0d6-42db-b5ea-32d0b8a7f751",
+            "x-amzn-ErrorType": "UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/",
+        },
+        403,
+        {"message": "The security token included in the request is invalid."},
+    ],
 }
 
 MODEL_PATH_RE = re.compile(r"/model/([^/]+)/invoke")
@@ -3454,7 +3473,7 @@ def __init__(self, handler=simple_get, port=None, *args, **kwargs):
 if __name__ == "__main__":
     # Use this to sort dict for easier future incremental updates
     print("RESPONSES = %s" % dict(sorted(RESPONSES.items(), key=lambda i: (i[1][1], i[0]))))
-    
+
     with MockExternalBedrockServer() as server:
         print("MockExternalBedrockServer serving on port %s" % str(server.port))
         while True:

diff --git a/tests/external_botocore/_test_bedrock_chat_completion.py b/tests/external_botocore/_test_bedrock_chat_completion.py
@@ -3,6 +3,7 @@
     "ai21.j2-mid-v1": '{"prompt": "%s", "temperature": %f, "maxTokens": %d}',
     "anthropic.claude-instant-v1": '{"prompt": "Human: %s Assistant:", "temperature": %f, "max_tokens_to_sample": %d}',
     "cohere.command-text-v14": '{"prompt": "%s", "temperature": %f, "max_tokens": %d}',
+    "meta.llama2-13b-chat-v1": '{"prompt": "%s", "temperature": %f, "max_gen_len": %d}',
 }
 
 chat_completion_expected_events = {
@@ -263,6 +264,72 @@
             },
         ),
     ],
+    "meta.llama2-13b-chat-v1": [
+        (
+            {"type": "LlmChatCompletionSummary"},
+            {
+                "id": None,  # UUID that varies with each run
+                "appName": "Python Agent Test (external_botocore)",
+                "conversation_id": "my-awesome-id",
+                "transaction_id": "transaction-id",
+                "span_id": None,
+                "trace_id": "trace-id",
+                "request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a",
+                "api_key_last_four_digits": "CRET",
+                "duration": None,  # Response time varies each test run
+                "request.model": "meta.llama2-13b-chat-v1",
+                "response.model": "meta.llama2-13b-chat-v1",
+                "response.usage.prompt_tokens": 17,
+                "response.usage.completion_tokens": 46,
+                "response.usage.total_tokens": 63,
+                "request.temperature": 0.7,
+                "request.max_tokens": 100,
+                "response.choices.finish_reason": "stop",
+                "vendor": "bedrock",
+                "ingest_source": "Python",
+                "response.number_of_messages": 2,
+            },
+        ),
+        (
+            {"type": "LlmChatCompletionMessage"},
+            {
+                "id": None,  # UUID that varies with each run
+                "appName": "Python Agent Test (external_botocore)",
+                "conversation_id": "my-awesome-id",
+                "request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a",
+                "span_id": None,
+                "trace_id": "trace-id",
+                "transaction_id": "transaction-id",
+                "content": "What is 212 degrees Fahrenheit converted to Celsius?",
+                "role": "user",
+                "completion_id": None,
+                "sequence": 0,
+                "response.model": "meta.llama2-13b-chat-v1",
+                "vendor": "bedrock",
+                "ingest_source": "Python",
+            },
+        ),
+        (
+            {"type": "LlmChatCompletionMessage"},
+            {
+                "id": None,  # UUID that varies with each run
+                "appName": "Python Agent Test (external_botocore)",
+                "conversation_id": "my-awesome-id",
+                "request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a",
+                "span_id": None,
+                "trace_id": "trace-id",
+                "transaction_id": "transaction-id",
+                "content": " Here's the answer:\n\n212°F = 100°C\n\nSo, 212 degrees Fahrenheit is equal to 100 degrees Celsius.",
+                "role": "assistant",
+                "completion_id": None,
+                "sequence": 1,
+                "response.model": "meta.llama2-13b-chat-v1",
+                "vendor": "bedrock",
+                "ingest_source": "Python",
+                "is_response": True,
+            },
+        ),
+    ],
 }
 
 chat_completion_invalid_model_error_events = [
@@ -480,6 +547,49 @@
             },
         ),
     ],
+    "meta.llama2-13b-chat-v1": [
+        (
+            {"type": "LlmChatCompletionSummary"},
+            {
+                "id": None,  # UUID that varies with each run
+                "appName": "Python Agent Test (external_botocore)",
+                "conversation_id": "my-awesome-id",
+                "transaction_id": "transaction-id",
+                "span_id": None,
+                "trace_id": "trace-id",
+                "request_id": "",
+                "api_key_last_four_digits": "-KEY",
+                "duration": None,  # Response time varies each test run
+                "request.model": "meta.llama2-13b-chat-v1",
+                "response.model": "meta.llama2-13b-chat-v1",
+                "request.temperature": 0.7,
+                "request.max_tokens": 100,
+                "vendor": "bedrock",
+                "ingest_source": "Python",
+                "response.number_of_messages": 1,
+                "error": True,
+            },
+        ),
+        (
+            {"type": "LlmChatCompletionMessage"},
+            {
+                "id": None,  # UUID that varies with each run
+                "appName": "Python Agent Test (external_botocore)",
+                "conversation_id": "my-awesome-id",
+                "request_id": "",
+                "span_id": None,
+                "trace_id": "trace-id",
+                "transaction_id": "transaction-id",
+                "content": "Invalid Token",
+                "role": "user",
+                "completion_id": None,
+                "sequence": 0,
+                "response.model": "meta.llama2-13b-chat-v1",
+                "vendor": "bedrock",
+                "ingest_source": "Python",
+            },
+        ),
+    ],
 }
 
 chat_completion_expected_client_errors = {
@@ -503,4 +613,9 @@
         "error.message": "The security token included in the request is invalid.",
         "error.code": "UnrecognizedClientException",
     },
+    "meta.llama2-13b-chat-v1": {
+        "http.statusCode": 403,
+        "error.message": "The security token included in the request is invalid.",
+        "error.code": "UnrecognizedClientException",
+    },
 }
diff --git a/tests/external_botocore/test_bedrock_chat_completion.py b/tests/external_botocore/test_bedrock_chat_completion.py
@@ -56,6 +56,7 @@ def is_file_payload(request):
         "ai21.j2-mid-v1",
         "anthropic.claude-instant-v1",
         "cohere.command-text-v14",
+        "meta.llama2-13b-chat-v1",
     ],
 )
 def model_id(request):