-
Notifications
You must be signed in to change notification settings - Fork 10
RD-3150 - faster http hooks #120
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
102cb32
8c9d857
70ce8cd
60660d7
4da506e
3675dc2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -136,8 +136,10 @@ def recursive_json_join(d1: dict, d2: dict): | |
* if key in d2 and is not dictionary, then the value is d2[key] | ||
* otherwise, join d1[key] and d2[key] | ||
""" | ||
if d1 is None or d2 is None: | ||
return d1 or d2 | ||
d = {} | ||
for key in itertools.chain(d1.keys(), d2.keys()): | ||
for key in set(itertools.chain(d1.keys(), d2.keys())): | ||
value = d1.get(key, d2.get(key)) | ||
if isinstance(value, dict): | ||
d[key] = recursive_json_join(d1.get(key, {}), d2.get(key, {})) | ||
|
@@ -293,7 +295,7 @@ def _parse_streams(event: dict) -> Dict[str, str]: | |
def should_scrub_domain(url: str) -> bool: | ||
if url and Configuration.domains_scrubber: | ||
for regex in Configuration.domains_scrubber: | ||
if re.match(regex, url, re.IGNORECASE): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. compile regexes once (~3s) |
||
if regex.match(url): | ||
return True | ||
return False | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,6 @@ | |
import uuid | ||
import signal | ||
import traceback | ||
import http.client | ||
from typing import List, Dict, Tuple, Optional, Callable, Set | ||
|
||
from lumigo_tracer.parsers.event_parser import EventParser | ||
|
@@ -17,6 +16,7 @@ | |
omit_keys, | ||
EXECUTION_TAGS_KEY, | ||
MAX_ENTRY_SIZE, | ||
get_timeout_buffer, | ||
) | ||
from lumigo_tracer import utils | ||
from lumigo_tracer.parsers.parser import get_parser, HTTP_TYPE, StepFunctionParser | ||
|
@@ -72,8 +72,6 @@ def __init__( | |
"region": region, | ||
"parentId": request_id, | ||
"info": {"tracer": {"version": version}, "traceId": {"Root": trace_root}}, | ||
"event": event, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. send the event and envs only on functions spans |
||
"envs": envs, | ||
"token": Configuration.token, | ||
} | ||
self.function_span = recursive_json_join( | ||
|
@@ -82,6 +80,8 @@ def __init__( | |
"type": "function", | ||
"name": name, | ||
"runtime": runtime, | ||
"event": event, | ||
"envs": envs, | ||
"memoryAllocated": memory_allocated, | ||
"readiness": "cold" if SpansContainer.is_cold else "warm", | ||
"info": { | ||
|
@@ -93,7 +93,7 @@ def __init__( | |
}, | ||
self.base_msg, | ||
) | ||
self.previous_request: Tuple[Optional[http.client.HTTPMessage], bytes] = (None, b"") | ||
self.previous_request: Tuple[Optional[dict], bytes] = (None, b"") | ||
self.previous_response_body: bytes = b"" | ||
self.http_span_ids_to_send: Set[str] = set() | ||
self.http_spans: List[Dict] = [] | ||
|
@@ -124,12 +124,11 @@ def start_timeout_timer(self, context=None) -> None: | |
get_logger().info("Skip setting timeout timer - Could not get the remaining time.") | ||
return | ||
remaining_time = context.get_remaining_time_in_millis() / 1000 | ||
if Configuration.timeout_timer_buffer >= remaining_time: | ||
buffer = get_timeout_buffer(remaining_time) | ||
if buffer >= remaining_time or remaining_time < 2: | ||
get_logger().debug("Skip setting timeout timer - Too short timeout.") | ||
return | ||
TimeoutMechanism.start( | ||
remaining_time - Configuration.timeout_timer_buffer, self.handle_timeout | ||
) | ||
TimeoutMechanism.start(remaining_time - buffer, self.handle_timeout) | ||
|
||
def add_request_event(self, parse_params: HttpRequest): | ||
""" | ||
|
@@ -169,7 +168,7 @@ def update_event_end_time(self) -> None: | |
self.http_spans[-1]["ended"] = int(time.time() * 1000) | ||
|
||
def update_event_response( | ||
self, host: Optional[str], status_code: int, headers: http.client.HTTPMessage, body: bytes | ||
self, host: Optional[str], status_code: int, headers: dict, body: bytes | ||
) -> None: | ||
""" | ||
:param host: If None, use the host from the last span, otherwise this is the first chuck and we can empty | ||
|
@@ -183,6 +182,7 @@ def update_event_response( | |
else: | ||
self.previous_response_body = b"" | ||
|
||
headers = {k.lower(): v for k, v in headers.items()} if headers else {} | ||
parser = get_parser(host, headers)() # type: ignore | ||
if len(self.previous_response_body) < MAX_ENTRY_SIZE: | ||
self.previous_response_body += body | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ | |
get_logger, | ||
lumigo_safe_execute, | ||
is_aws_environment, | ||
ensure_str, | ||
) | ||
from lumigo_tracer.spans_container import SpansContainer, TimeoutMechanism | ||
from lumigo_tracer.parsers.http_data_classes import HttpRequest | ||
|
@@ -27,6 +28,7 @@ | |
CONTEXT_WRAPPED_BY_LUMIGO_KEY = "_wrapped_by_lumigo" | ||
MAX_READ_SIZE = 1024 | ||
already_wrapped = False | ||
LUMIGO_HEADERS_HOOK_KEY = "_lumigo_headers_hook" | ||
|
||
|
||
def _request_wrapper(func, instance, args, kwargs): | ||
|
@@ -51,7 +53,12 @@ def _request_wrapper(func, instance, args, kwargs): | |
with lumigo_safe_execute("parse request"): | ||
if isinstance(data, bytes) and _BODY_HEADER_SPLITTER in data: | ||
headers, body = data.split(_BODY_HEADER_SPLITTER, 1) | ||
if _FLAGS_HEADER_SPLITTER in headers: | ||
hooked_headers = getattr(instance, LUMIGO_HEADERS_HOOK_KEY, None) | ||
if hooked_headers: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. instead of parse the headers from the bytes - use the hook (~20s) |
||
# we will get here only if _headers_reminder_wrapper ran first. remove its traces. | ||
headers = {ensure_str(k): ensure_str(v) for k, v in hooked_headers.items()} | ||
setattr(instance, LUMIGO_HEADERS_HOOK_KEY, None) | ||
elif _FLAGS_HEADER_SPLITTER in headers: | ||
request_info, headers = headers.split(_FLAGS_HEADER_SPLITTER, 1) | ||
headers = http.client.parse_headers(BytesIO(headers)) | ||
path_and_query_params = ( | ||
|
@@ -63,6 +70,8 @@ def _request_wrapper(func, instance, args, kwargs): | |
) | ||
uri = f"{host}{path_and_query_params}" | ||
host = host or headers.get("Host") | ||
else: | ||
headers = None | ||
|
||
with lumigo_safe_execute("add request event"): | ||
if headers: | ||
|
@@ -80,14 +89,23 @@ def _request_wrapper(func, instance, args, kwargs): | |
return ret_val | ||
|
||
|
||
def _headers_reminder_wrapper(func, instance, args, kwargs): | ||
""" | ||
This is the wrapper of the function `http.client.HTTPConnection.request` that gets the headers. | ||
Remember the headers helps us to improve performances on requests that use this flow. | ||
""" | ||
setattr(instance, LUMIGO_HEADERS_HOOK_KEY, kwargs.get("headers")) | ||
return func(*args, **kwargs) | ||
|
||
|
||
def _response_wrapper(func, instance, args, kwargs): | ||
""" | ||
This is the wrapper of the function that can be called only after that the http request was sent. | ||
Note that we don't examine the response data because it may change the original behaviour (ret_val.peek()). | ||
""" | ||
ret_val = func(*args, **kwargs) | ||
with lumigo_safe_execute("parse response"): | ||
headers = ret_val.headers | ||
headers = dict(ret_val.headers.items()) | ||
status_code = ret_val.code | ||
SpansContainer.get_span().update_event_response(instance.host, status_code, headers, b"") | ||
return ret_val | ||
|
@@ -101,7 +119,7 @@ def _read_wrapper(func, instance, args, kwargs): | |
if ret_val: | ||
with lumigo_safe_execute("parse response.read"): | ||
SpansContainer.get_span().update_event_response( | ||
None, instance.code, instance.headers, ret_val | ||
None, instance.code, dict(instance.headers.items()), ret_val | ||
) | ||
return ret_val | ||
|
||
|
@@ -115,7 +133,7 @@ def _read_stream_wrapper_generator(stream_generator, instance): | |
for partial_response in stream_generator: | ||
with lumigo_safe_execute("parse response.read_chunked"): | ||
SpansContainer.get_span().update_event_response( | ||
None, instance.status, instance.headers, partial_response | ||
None, instance.status, dict(instance.headers.items()), partial_response | ||
) | ||
yield partial_response | ||
|
||
|
@@ -281,6 +299,9 @@ def wrap_http_calls(): | |
with lumigo_safe_execute("wrap http calls"): | ||
get_logger().debug("wrapping the http request") | ||
wrap_function_wrapper("http.client", "HTTPConnection.send", _request_wrapper) | ||
wrap_function_wrapper( | ||
"http.client", "HTTPConnection.request", _headers_reminder_wrapper | ||
) | ||
wrap_function_wrapper("botocore.awsrequest", "AWSRequest.__init__", _putheader_wrapper) | ||
wrap_function_wrapper("http.client", "HTTPConnection.getresponse", _response_wrapper) | ||
wrap_function_wrapper("http.client", "HTTPResponse.read", _read_wrapper) | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lowering everything once (instead of the underline
__getter__
function) ~6s