Skip to content

Commit

Permalink
Add support for Meta Llama2. (#1010)
Browse files Browse the repository at this point in the history
* Add support for Llama2.

* Fixup: lint errors

* [Mega-Linter] Apply linters fixes

* Trigger tests

---------

Co-authored-by: Hannah Stepanek <hstepanek@newrelic.com>
Co-authored-by: hmstepanek <hmstepanek@users.noreply.github.com>
  • Loading branch information
3 people committed Dec 21, 2023
1 parent 3d3aa4f commit 7051455
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 10 deletions.
51 changes: 43 additions & 8 deletions newrelic/hooks/external_botocore.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def create_chat_completion_message_event(
"response.model": request_model,
"vendor": "bedrock",
"ingest_source": "Python",
"is_response": True
"is_response": True,
}
transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_message_dict)

Expand Down Expand Up @@ -246,7 +246,7 @@ def extract_bedrock_claude_model(request_body, response_body=None):
chat_completion_summary_dict = {
"request.max_tokens": request_body.get("max_tokens_to_sample", ""),
"request.temperature": request_body.get("temperature", ""),
"response.number_of_messages": len(input_message_list)
"response.number_of_messages": len(input_message_list),
}

if response_body:
Expand All @@ -264,6 +264,40 @@ def extract_bedrock_claude_model(request_body, response_body=None):
return input_message_list, output_message_list, chat_completion_summary_dict


def extract_bedrock_llama_model(request_body, response_body=None):
request_body = json.loads(request_body)
if response_body:
response_body = json.loads(response_body)

input_message_list = [{"role": "user", "content": request_body.get("prompt", "")}]

chat_completion_summary_dict = {
"request.max_tokens": request_body.get("max_gen_len", ""),
"request.temperature": request_body.get("temperature", ""),
"response.number_of_messages": len(input_message_list),
}

if response_body:
output_message_list = [{"role": "assistant", "content": response_body.get("generation", "")}]
prompt_tokens = response_body.get("prompt_token_count", None)
completion_tokens = response_body.get("generation_token_count", None)
total_tokens = prompt_tokens + completion_tokens if prompt_tokens and completion_tokens else None

chat_completion_summary_dict.update(
{
"response.usage.completion_tokens": completion_tokens,
"response.usage.prompt_tokens": prompt_tokens,
"response.usage.total_tokens": total_tokens,
"response.choices.finish_reason": response_body.get("stop_reason", ""),
"response.number_of_messages": len(input_message_list) + len(output_message_list),
}
)
else:
output_message_list = []

return input_message_list, output_message_list, chat_completion_summary_dict


def extract_bedrock_cohere_model(request_body, response_body=None):
request_body = json.loads(request_body)
if response_body:
Expand All @@ -274,7 +308,7 @@ def extract_bedrock_cohere_model(request_body, response_body=None):
chat_completion_summary_dict = {
"request.max_tokens": request_body.get("max_tokens", ""),
"request.temperature": request_body.get("temperature", ""),
"response.number_of_messages": len(input_message_list)
"response.number_of_messages": len(input_message_list),
}

if response_body:
Expand All @@ -300,6 +334,7 @@ def extract_bedrock_cohere_model(request_body, response_body=None):
("ai21.j2", extract_bedrock_ai21_j2_model),
("cohere", extract_bedrock_cohere_model),
("anthropic.claude", extract_bedrock_claude_model),
("meta.llama2", extract_bedrock_llama_model),
]


Expand Down Expand Up @@ -368,7 +403,7 @@ def wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs):
notice_error_attributes = {
"http.statusCode": error_attributes["http.statusCode"],
"error.message": error_attributes["error.message"],
"error.code": error_attributes["error.code"]
"error.code": error_attributes["error.code"],
}

if is_embedding:
Expand All @@ -392,7 +427,7 @@ def wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs):
ft.duration,
True,
trace_id,
span_id
span_id,
)
else:
handle_chat_completion_event(
Expand All @@ -406,7 +441,7 @@ def wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs):
ft.duration,
True,
trace_id,
span_id
span_id,
)

finally:
Expand Down Expand Up @@ -463,7 +498,7 @@ def handle_embedding_event(
duration,
is_error,
trace_id,
span_id
span_id,
):
embedding_id = str(uuid.uuid4())

Expand Down Expand Up @@ -508,7 +543,7 @@ def handle_chat_completion_event(
duration,
is_error,
trace_id,
span_id
span_id,
):
custom_attrs_dict = transaction._custom_params
conversation_id = custom_attrs_dict.get("llm.conversation_id", "")
Expand Down
2 changes: 1 addition & 1 deletion newrelic/hooks/mlmodel_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,7 +864,7 @@ def wrap_base_client_process_response(wrapped, instance, args, kwargs):
nr_response_headers = getattr(response, "headers")

return_val = wrapped(*args, **kwargs)
# Obtain reponse headers for v1
# Obtain response headers for v1
return_val._nr_response_headers = nr_response_headers
return return_val

Expand Down
21 changes: 20 additions & 1 deletion tests/external_botocore/_mock_external_bedrock_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3332,6 +3332,16 @@
"prompt": "What is 212 degrees Fahrenheit converted to Celsius?",
},
],
"meta.llama2-13b-chat-v1::What is 212 degrees Fahrenheit converted to Celsius?": [
{"Content-Type": "application/json", "x-amzn-RequestId": "9a64cdb0-3e82-41c7-873a-c12a77e0143a"},
200,
{
"generation": " Here's the answer:\n\n212°F = 100°C\n\nSo, 212 degrees Fahrenheit is equal to 100 degrees Celsius.",
"prompt_token_count": 17,
"generation_token_count": 46,
"stop_reason": "stop",
},
],
"does-not-exist::": [
{
"Content-Type": "application/json",
Expand Down Expand Up @@ -3395,6 +3405,15 @@
403,
{"message": "The security token included in the request is invalid."},
],
"meta.llama2-13b-chat-v1::Invalid Token": [
{
"Content-Type": "application/json",
"x-amzn-RequestId": "22476490-a0d6-42db-b5ea-32d0b8a7f751",
"x-amzn-ErrorType": "UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/",
},
403,
{"message": "The security token included in the request is invalid."},
],
}

MODEL_PATH_RE = re.compile(r"/model/([^/]+)/invoke")
Expand Down Expand Up @@ -3454,7 +3473,7 @@ def __init__(self, handler=simple_get, port=None, *args, **kwargs):
if __name__ == "__main__":
# Use this to sort dict for easier future incremental updates
print("RESPONSES = %s" % dict(sorted(RESPONSES.items(), key=lambda i: (i[1][1], i[0]))))

with MockExternalBedrockServer() as server:
print("MockExternalBedrockServer serving on port %s" % str(server.port))
while True:
Expand Down
115 changes: 115 additions & 0 deletions tests/external_botocore/_test_bedrock_chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"ai21.j2-mid-v1": '{"prompt": "%s", "temperature": %f, "maxTokens": %d}',
"anthropic.claude-instant-v1": '{"prompt": "Human: %s Assistant:", "temperature": %f, "max_tokens_to_sample": %d}',
"cohere.command-text-v14": '{"prompt": "%s", "temperature": %f, "max_tokens": %d}',
"meta.llama2-13b-chat-v1": '{"prompt": "%s", "temperature": %f, "max_gen_len": %d}',
}

chat_completion_expected_events = {
Expand Down Expand Up @@ -263,6 +264,72 @@
},
),
],
"meta.llama2-13b-chat-v1": [
(
{"type": "LlmChatCompletionSummary"},
{
"id": None, # UUID that varies with each run
"appName": "Python Agent Test (external_botocore)",
"conversation_id": "my-awesome-id",
"transaction_id": "transaction-id",
"span_id": None,
"trace_id": "trace-id",
"request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a",
"api_key_last_four_digits": "CRET",
"duration": None, # Response time varies each test run
"request.model": "meta.llama2-13b-chat-v1",
"response.model": "meta.llama2-13b-chat-v1",
"response.usage.prompt_tokens": 17,
"response.usage.completion_tokens": 46,
"response.usage.total_tokens": 63,
"request.temperature": 0.7,
"request.max_tokens": 100,
"response.choices.finish_reason": "stop",
"vendor": "bedrock",
"ingest_source": "Python",
"response.number_of_messages": 2,
},
),
(
{"type": "LlmChatCompletionMessage"},
{
"id": None, # UUID that varies with each run
"appName": "Python Agent Test (external_botocore)",
"conversation_id": "my-awesome-id",
"request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a",
"span_id": None,
"trace_id": "trace-id",
"transaction_id": "transaction-id",
"content": "What is 212 degrees Fahrenheit converted to Celsius?",
"role": "user",
"completion_id": None,
"sequence": 0,
"response.model": "meta.llama2-13b-chat-v1",
"vendor": "bedrock",
"ingest_source": "Python",
},
),
(
{"type": "LlmChatCompletionMessage"},
{
"id": None, # UUID that varies with each run
"appName": "Python Agent Test (external_botocore)",
"conversation_id": "my-awesome-id",
"request_id": "9a64cdb0-3e82-41c7-873a-c12a77e0143a",
"span_id": None,
"trace_id": "trace-id",
"transaction_id": "transaction-id",
"content": " Here's the answer:\n\n212°F = 100°C\n\nSo, 212 degrees Fahrenheit is equal to 100 degrees Celsius.",
"role": "assistant",
"completion_id": None,
"sequence": 1,
"response.model": "meta.llama2-13b-chat-v1",
"vendor": "bedrock",
"ingest_source": "Python",
"is_response": True,
},
),
],
}

chat_completion_invalid_model_error_events = [
Expand Down Expand Up @@ -480,6 +547,49 @@
},
),
],
"meta.llama2-13b-chat-v1": [
(
{"type": "LlmChatCompletionSummary"},
{
"id": None, # UUID that varies with each run
"appName": "Python Agent Test (external_botocore)",
"conversation_id": "my-awesome-id",
"transaction_id": "transaction-id",
"span_id": None,
"trace_id": "trace-id",
"request_id": "",
"api_key_last_four_digits": "-KEY",
"duration": None, # Response time varies each test run
"request.model": "meta.llama2-13b-chat-v1",
"response.model": "meta.llama2-13b-chat-v1",
"request.temperature": 0.7,
"request.max_tokens": 100,
"vendor": "bedrock",
"ingest_source": "Python",
"response.number_of_messages": 1,
"error": True,
},
),
(
{"type": "LlmChatCompletionMessage"},
{
"id": None, # UUID that varies with each run
"appName": "Python Agent Test (external_botocore)",
"conversation_id": "my-awesome-id",
"request_id": "",
"span_id": None,
"trace_id": "trace-id",
"transaction_id": "transaction-id",
"content": "Invalid Token",
"role": "user",
"completion_id": None,
"sequence": 0,
"response.model": "meta.llama2-13b-chat-v1",
"vendor": "bedrock",
"ingest_source": "Python",
},
),
],
}

chat_completion_expected_client_errors = {
Expand All @@ -503,4 +613,9 @@
"error.message": "The security token included in the request is invalid.",
"error.code": "UnrecognizedClientException",
},
"meta.llama2-13b-chat-v1": {
"http.statusCode": 403,
"error.message": "The security token included in the request is invalid.",
"error.code": "UnrecognizedClientException",
},
}
1 change: 1 addition & 0 deletions tests/external_botocore/test_bedrock_chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def is_file_payload(request):
"ai21.j2-mid-v1",
"anthropic.claude-instant-v1",
"cohere.command-text-v14",
"meta.llama2-13b-chat-v1",
],
)
def model_id(request):
Expand Down

0 comments on commit 7051455

Please sign in to comment.