diff --git a/model-engine/model_engine_server/core/docker/remote_build.py b/model-engine/model_engine_server/core/docker/remote_build.py index 8b250cfd..6261334e 100644 --- a/model-engine/model_engine_server/core/docker/remote_build.py +++ b/model-engine/model_engine_server/core/docker/remote_build.py @@ -70,7 +70,8 @@ def zip_context( assert len(folders_to_include) > 0 assert s3_file_name.endswith(".gz") - print(f"Uploading to s3 at: {s3_file_name}") + s3_uri = f"s3://{S3_BUCKET}/{s3_file_name}" + print(f"Uploading to s3 at: {s3_uri}") try: # Need to gimme_okta_aws_creds (you can export AWS_PROFILE='ml-admin' right after) tar_command = _build_tar_cmd(context, ignore_file, folders_to_include) @@ -83,7 +84,7 @@ def zip_context( ) as proc: assert proc.stdout is not None with storage_client.open( - f"s3://{S3_BUCKET}/{s3_file_name}", + s3_uri, "wb", ) as out_file: shutil.copyfileobj(proc.stdout, out_file) @@ -429,6 +430,7 @@ def build_remote_block( :param ignore_file: File (e.g. .dockerignore) containing things to ignore when preparing docker context. Relative to context :return: BuildResult representing if docker image has successfully built/pushed """ + logger.info(f"build_remote_block args {locals()}") job_name = build_remote( context, dockerfile, @@ -439,6 +441,7 @@ def build_remote_block( build_args, custom_tags, ) + logger.info(f"Waiting for job {job_name} to finish") result = get_pod_status_and_log(job_name) return result diff --git a/model-engine/model_engine_server/core/loggers.py b/model-engine/model_engine_server/core/loggers.py index 91b69758..e8245199 100644 --- a/model-engine/model_engine_server/core/loggers.py +++ b/model-engine/model_engine_server/core/loggers.py @@ -10,7 +10,7 @@ import ddtrace import json_log_formatter import tqdm -from ddtrace.helpers import get_correlation_ids +from ddtrace import tracer # DO NOT CHANGE LOGGING FORMAT LOG_FORMAT: str = "%(asctime)s %(levelname)s [%(name)s] [%(filename)s:%(lineno)d] - %(message)s" @@ -82,11 +82,12 @@ def json_record(self, message: str, extra: dict, record: logging.LogRecord) -> d if request_id: extra["request_id"] = request_id - trace_id, span_id = get_correlation_ids() + context = tracer.current_trace_context() + trace_id, span_id = (context.trace_id, context.span_id) if context else (0, 0) # add ids to event dictionary - extra["dd.trace_id"] = trace_id or 0 - extra["dd.span_id"] = span_id or 0 + extra["dd.trace_id"] = trace_id + extra["dd.span_id"] = span_id # add the env, service, and version configured for the tracer. # If tracing is not set up, then this should pull values from DD_ENV, DD_SERVICE, and DD_VERSION. diff --git a/model-engine/model_engine_server/inference/post_inference_hooks.py b/model-engine/model_engine_server/inference/post_inference_hooks.py index cd460a27..00abaa5d 100644 --- a/model-engine/model_engine_server/inference/post_inference_hooks.py +++ b/model-engine/model_engine_server/inference/post_inference_hooks.py @@ -1,32 +1,19 @@ from abc import ABC, abstractmethod from typing import Any, Dict, List, Optional -from uuid import uuid4 import requests -from model_engine_server.common.constants import ( - BILLING_POST_INFERENCE_HOOK, - CALLBACK_POST_INFERENCE_HOOK, -) +from model_engine_server.common.constants import CALLBACK_POST_INFERENCE_HOOK from model_engine_server.common.dtos.tasks import EndpointPredictV1Request from model_engine_server.core.loggers import filename_wo_ext, make_logger from model_engine_server.domain.entities import CallbackAuth, CallbackBasicAuth -from model_engine_server.inference.common import _write_to_s3 from model_engine_server.inference.domain.gateways.inference_monitoring_metrics_gateway import ( InferenceMonitoringMetricsGateway, ) -from model_engine_server.inference.domain.gateways.usage_metrics_gateway import UsageMetricsGateway -from model_engine_server.inference.infra.gateways.fake_usage_metrics_gateway import ( - FakeUsageMetricsGateway, -) from tenacity import Retrying, stop_after_attempt, wait_exponential logger = make_logger(filename_wo_ext(__file__)) -def _upload_data(data: Any): - return _write_to_s3(data).get("result_url") - - class PostInferenceHook(ABC): def __init__( self, @@ -48,41 +35,6 @@ def handle( pass -class BillingHook(PostInferenceHook): - def __init__( - self, - endpoint_name: str, - bundle_name: str, - user_id: str, - billing_queue: Optional[str], - billing_tags: Optional[Dict[str, Any]], - ): - super().__init__(endpoint_name, bundle_name, user_id) - self._billing_queue = billing_queue - self._billing_tags = billing_tags or {} - - def handle( - self, - request_payload: EndpointPredictV1Request, - response: Dict[str, Any], - task_id: Optional[str], - ): - if not self._user_id or not self._billing_queue: - logger.error("Usage inputs could not be found for billing hook, aborting") - return - if not task_id: - task_id = str(uuid4()) - - events_queue: UsageMetricsGateway - try: - from plugins.eventbridge_usage_metrics_gateway import EventbridgeUsageMetricsGateway - - events_queue = EventbridgeUsageMetricsGateway(self._billing_queue) - except ModuleNotFoundError: - events_queue = FakeUsageMetricsGateway() - events_queue.emit_task_call_metric(idempotency_token=task_id, tags=self._billing_tags) - - class CallbackHook(PostInferenceHook): def __init__( self, @@ -142,15 +94,7 @@ def __init__( # TODO: Ensure that this process gracefully handles errors in # initializing each post-inference hook. hook_lower = hook.lower() - if hook_lower == BILLING_POST_INFERENCE_HOOK: - self._hooks[hook_lower] = BillingHook( - endpoint_name, - bundle_name, - user_id, - billing_queue, - billing_tags, - ) - elif hook_lower == CALLBACK_POST_INFERENCE_HOOK: + if hook_lower == CALLBACK_POST_INFERENCE_HOOK: self._hooks[hook_lower] = CallbackHook( endpoint_name, bundle_name, diff --git a/model-engine/model_engine_server/inference/pytorch_or_tf.base.Dockerfile b/model-engine/model_engine_server/inference/pytorch_or_tf.base.Dockerfile index edac54c9..01cbdf0c 100644 --- a/model-engine/model_engine_server/inference/pytorch_or_tf.base.Dockerfile +++ b/model-engine/model_engine_server/inference/pytorch_or_tf.base.Dockerfile @@ -57,7 +57,6 @@ COPY --chown=modelengine \ RUN pip install -r /app/model-engine/model_engine_server/inference/requirements_base.txt COPY --chown=modelengine model-engine/setup.py /app/model-engine/setup.py -COPY --chown=modelengine model-engine/model_engine_server.egg-info /app/model-engine/model_engine_server.egg-info COPY --chown=modelengine model-engine/model_engine_server/__init__.py /app/model-engine/model_engine_server/__init__.py COPY --chown=modelengine model-engine/model_engine_server/common /app/model-engine/model_engine_server/common COPY --chown=modelengine model-engine/model_engine_server/core /app/model-engine/model_engine_server/core diff --git a/model-engine/model_engine_server/inference/requirements_base.txt b/model-engine/model_engine_server/inference/requirements_base.txt index a352a14a..cedabe42 100644 --- a/model-engine/model_engine_server/inference/requirements_base.txt +++ b/model-engine/model_engine_server/inference/requirements_base.txt @@ -1,12 +1,22 @@ -aioredis==2.0.1 +aioredis~=2.0 +boto3>=1.28.38 celery[redis,sqs,tblib]==5.3.1 +datadog-api-client==2.11.0 +datadog~=0.46.0 fastapi==0.78.0 -gunicorn==20.1.0 # Incompatibility between celery 5 and python 3.7 because of importlib-metadata 5, so we pin it importlib-metadata<5.0;python_version<"3.8" -json-log-formatter==0.5.2 +scale-launch>=0.1.0 smart_open==5.1.0 -tqdm==4.65.0 -# Pin typing-extensions so aioitertools doesn't break typing-extensions>=4.1.1 uvicorn==0.17.6 +waitress==2.0.0 + +# HACK: at time of adding, these deps are imported by model-engine/model_engine_server files +# add here to to prevent `ModuleNotFoundError` error on container startup, these should be in sync with server reqs +# long term: consider having slimmer deps and seperating inference container deps from server container deps +ddtrace==1.8.3 # required for ddtrace-run entrypoint command as well +json-log-formatter~=0.3 # model_engine_server/core/loggers.py +tenacity>=6.0.0,<=6.2.0 # model_engine_server/core/loggers.py +tqdm~=4.64 # model_engine_server/common/service_requests.py +gunicorn~=20.0 diff --git a/model-engine/model_engine_server/infra/repositories/ecr_docker_repository.py b/model-engine/model_engine_server/infra/repositories/ecr_docker_repository.py index ca5f7469..d2277ab3 100644 --- a/model-engine/model_engine_server/infra/repositories/ecr_docker_repository.py +++ b/model-engine/model_engine_server/infra/repositories/ecr_docker_repository.py @@ -4,8 +4,11 @@ from model_engine_server.core.config import infra_config from model_engine_server.core.docker.ecr import image_exists as ecr_image_exists from model_engine_server.core.docker.remote_build import build_remote_block +from model_engine_server.core.loggers import logger_name, make_logger from model_engine_server.domain.repositories import DockerRepository +logger = make_logger(logger_name()) + class ECRDockerRepository(DockerRepository): def image_exists( @@ -21,6 +24,7 @@ def get_image_url(self, image_tag: str, repository_name: str) -> str: return f"{infra_config().docker_repo_prefix}/{repository_name}:{image_tag}" def build_image(self, image_params: BuildImageRequest) -> BuildImageResponse: + logger.info(f"build_image args {locals()}") folders_to_include = ["model-engine"] if image_params.requirements_folder: folders_to_include.append(image_params.requirements_folder) diff --git a/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py b/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py index 61b381e0..eabbf034 100644 --- a/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py +++ b/model-engine/model_engine_server/infra/services/live_endpoint_builder_service.py @@ -172,6 +172,7 @@ async def build_endpoint( base_image_params = self.get_base_image_params( build_endpoint_request, logger_adapter ) + logger.info(f"base_image_params: {base_image_params}") base_image = await self._build_image( base_image_params, build_endpoint_request, @@ -490,6 +491,8 @@ def get_base_image_params( inference_folder = "model-engine/model_engine_server/inference" base_path: str = os.getenv("WORKSPACE") # type: ignore + logger.info(f"inference_folder: {inference_folder}") + logger.info(f"dockerfile: {inference_folder}/{dockerfile}") return BuildImageRequest( repo="launch/inference", image_tag=resulting_image_tag[:MAX_IMAGE_TAG_LEN], diff --git a/model-engine/model_engine_server/service_builder/tasks_v1.py b/model-engine/model_engine_server/service_builder/tasks_v1.py index 539b6803..772a5297 100644 --- a/model-engine/model_engine_server/service_builder/tasks_v1.py +++ b/model-engine/model_engine_server/service_builder/tasks_v1.py @@ -90,16 +90,7 @@ async def _build_endpoint( session = SessionAsyncNullPool pool = aioredis.BlockingConnectionPool.from_url(hmi_config.cache_redis_url) redis = aioredis.Redis(connection_pool=pool) - - service: LiveEndpointBuilderService - try: - from plugins.dependencies import ( - get_live_endpoint_builder_service as get_custom_live_endpoint_builder_service, - ) - - service = get_custom_live_endpoint_builder_service(session, redis) - except ModuleNotFoundError: - service = get_live_endpoint_builder_service(session, redis) + service: LiveEndpointBuilderService = get_live_endpoint_builder_service(session, redis) response = await service.build_endpoint(build_endpoint_request) await redis.close() diff --git a/model-engine/requirements.in b/model-engine/requirements.in index 5caed45c..e173eeef 100644 --- a/model-engine/requirements.in +++ b/model-engine/requirements.in @@ -15,7 +15,7 @@ croniter==1.4.1 dataclasses-json>=0.5.7 datadog-api-client==2.11.0 datadog~=0.46.0 -ddtrace~=0.49.2 +ddtrace==1.8.3 deprecation~=2.1 docker~=5.0 fastapi==0.78.0 diff --git a/model-engine/requirements.txt b/model-engine/requirements.txt index 1e37fe0f..c367da1e 100644 --- a/model-engine/requirements.txt +++ b/model-engine/requirements.txt @@ -8,14 +8,14 @@ aiofiles==23.1.0 # via quart aiohttp==3.8.5 # via - # -r requirements.in + # -r model-engine/requirements.in # kubernetes-asyncio aioredis==2.0.1 - # via -r requirements.in + # via -r model-engine/requirements.in aiosignal==1.3.1 # via aiohttp alembic==1.8.1 - # via -r requirements.in + # via -r model-engine/requirements.in amqp==5.1.1 # via kombu anyio==3.7.1 @@ -28,12 +28,20 @@ async-timeout==4.0.2 # via # aiohttp # aioredis + # redis asyncpg==0.27.0 - # via -r requirements.in + # via -r model-engine/requirements.in attrs==23.1.0 # via # aiohttp + # cattrs # ddtrace + # jsonschema + # referencing +backports-zoneinfo[tzdata]==0.2.1 + # via + # celery + # kombu billiard==4.1.0 # via celery bleach==6.0.0 @@ -42,24 +50,28 @@ blinker==1.6.2 # via quart boto3==1.28.1 # via - # -r requirements.in + # -r model-engine/requirements.in # celery # kombu boto3-stubs[essential]==1.26.67 - # via -r requirements.in + # via -r model-engine/requirements.in botocore==1.31.1 # via - # -r requirements.in + # -r model-engine/requirements.in # boto3 # s3transfer botocore-stubs==1.29.165 # via boto3-stubs build==0.8.0 - # via -r requirements.in + # via -r model-engine/requirements.in +bytecode==0.14.2 + # via ddtrace cachetools==5.3.1 # via google-auth +cattrs==23.1.2 + # via ddtrace celery[redis,sqs,tblib]==5.3.1 - # via -r requirements.in + # via -r model-engine/requirements.in certifi==2023.7.22 # via # datadog-api-client @@ -74,7 +86,7 @@ charset-normalizer==3.2.0 # requests click==8.1.4 # via - # -r requirements.in + # -r model-engine/requirements.in # celery # click-didyoumean # click-plugins @@ -88,31 +100,39 @@ click-plugins==1.1.1 click-repl==0.3.0 # via celery cloudpickle==2.1.0 - # via -r requirements.in + # via -r model-engine/requirements.in colorama==0.4.6 # via twine commonmark==0.9.1 # via rich croniter==1.4.1 - # via -r requirements.in + # via -r model-engine/requirements.in cryptography==41.0.3 # via secretstorage dataclasses-json==0.5.9 - # via -r requirements.in + # via -r model-engine/requirements.in datadog==0.46.0 - # via -r requirements.in + # via -r model-engine/requirements.in datadog-api-client==2.11.0 - # via -r requirements.in -ddtrace==0.49.2 - # via -r requirements.in + # via -r model-engine/requirements.in +ddsketch==2.0.4 + # via ddtrace +ddtrace==1.8.3 + # via -r model-engine/requirements.in deprecation==2.1.0 - # via -r requirements.in + # via -r model-engine/requirements.in docker==5.0.3 - # via -r requirements.in + # via -r model-engine/requirements.in docutils==0.20.1 # via readme-renderer +envier==0.4.0 + # via ddtrace +exceptiongroup==1.1.3 + # via + # anyio + # cattrs fastapi==0.78.0 - # via -r requirements.in + # via -r model-engine/requirements.in frozenlist==1.3.3 # via # aiohttp @@ -120,15 +140,15 @@ frozenlist==1.3.3 gitdb==4.0.10 # via gitpython gitdb2==2.0.6 - # via -r requirements.in + # via -r model-engine/requirements.in gitpython==3.1.32 - # via -r requirements.in + # via -r model-engine/requirements.in google-auth==2.21.0 # via kubernetes greenlet==2.0.2 # via sqlalchemy gunicorn==20.1.0 - # via -r requirements.in + # via -r model-engine/requirements.in h11==0.14.0 # via # hypercorn @@ -139,7 +159,7 @@ h2==4.1.0 hpack==4.0.0 # via h2 httptools==0.5.0 - # via -r requirements.in + # via -r model-engine/requirements.in hypercorn==0.14.4 # via quart hyperframe==6.0.1 @@ -151,8 +171,16 @@ idna==3.4 # yarl importlib-metadata==6.8.0 # via + # alembic # keyring + # quart # twine +importlib-resources==6.0.1 + # via + # alembic + # jsonschema + # jsonschema-specifications + # keyring itsdangerous==2.1.2 # via quart jaraco-classes==3.3.0 @@ -163,24 +191,28 @@ jeepney==0.8.0 # secretstorage jinja2==3.0.3 # via - # -r requirements.in + # -r model-engine/requirements.in # quart jmespath==1.0.1 # via # boto3 # botocore json-log-formatter==0.5.2 - # via -r requirements.in + # via -r model-engine/requirements.in +jsonschema==4.19.0 + # via ddtrace +jsonschema-specifications==2023.7.1 + # via jsonschema keyring==24.2.0 # via twine kombu[sqs]==5.3.1 # via celery kubeconfig==1.1.1 - # via -r requirements.in + # via -r model-engine/requirements.in kubernetes==25.3.0 - # via -r requirements.in + # via -r model-engine/requirements.in kubernetes-asyncio==24.2.2 - # via -r requirements.in + # via -r model-engine/requirements.in mako==1.2.4 # via alembic markupsafe==2.1.3 @@ -220,7 +252,7 @@ mypy-extensions==1.0.0 oauthlib==3.2.2 # via requests-oauthlib orjson==3.8.6 - # via -r requirements.in + # via -r model-engine/requirements.in packaging==23.1 # via # build @@ -233,18 +265,21 @@ pg8000==1.29.8 # via testing-postgresql pkginfo==1.9.6 # via twine +pkgutil-resolve-name==1.3.10 + # via jsonschema priority==2.0.0 # via hypercorn prompt-toolkit==3.0.39 # via click-repl protobuf==3.20.3 # via - # -r requirements.in + # -r model-engine/requirements.in + # ddsketch # ddtrace psycopg2-binary==2.9.3 - # via -r requirements.in + # via -r model-engine/requirements.in py-xid==0.3.0 - # via -r requirements.in + # via -r model-engine/requirements.in pyasn1==0.5.0 # via # pyasn1-modules @@ -255,12 +290,12 @@ pycparser==2.21 # via cffi pycurl==7.45.2 # via - # -r requirements.in + # -r model-engine/requirements.in # celery # kombu pydantic==1.10.11 # via - # -r requirements.in + # -r model-engine/requirements.in # fastapi pygments==2.15.1 # via @@ -276,21 +311,25 @@ python-dateutil==2.8.2 # kubernetes-asyncio # pg8000 python-multipart==0.0.6 - # via -r requirements.in + # via -r model-engine/requirements.in pyyaml==6.0 # via # kubeconfig # kubernetes # kubernetes-asyncio quart==0.18.3 - # via -r requirements.in + # via -r model-engine/requirements.in readme-renderer==40.0 # via twine redis==4.6.0 # via celery +referencing==0.30.2 + # via + # jsonschema + # jsonschema-specifications requests==2.31.0 # via - # -r requirements.in + # -r model-engine/requirements.in # datadog # docker # kubernetes @@ -299,7 +338,7 @@ requests==2.31.0 # requests-toolbelt # twine requests-auth-aws-sigv4==0.7 - # via -r requirements.in + # via -r model-engine/requirements.in requests-oauthlib==1.3.1 # via kubernetes requests-toolbelt==1.0.0 @@ -307,7 +346,11 @@ requests-toolbelt==1.0.0 rfc3986==2.0.0 # via twine rich==12.6.0 - # via -r requirements.in + # via -r model-engine/requirements.in +rpds-py==0.10.0 + # via + # jsonschema + # referencing rsa==4.9 # via google-auth s3transfer==0.6.1 @@ -317,10 +360,11 @@ scramp==1.4.4 secretstorage==3.3.3 # via keyring sh==1.14.3 - # via -r requirements.in + # via -r model-engine/requirements.in six==1.16.0 # via # bleach + # ddsketch # ddtrace # google-auth # kubernetes @@ -328,7 +372,7 @@ six==1.16.0 # python-dateutil # tenacity smart-open==5.2.1 - # via -r requirements.in + # via -r model-engine/requirements.in smmap==5.0.0 # via # gitdb @@ -339,12 +383,12 @@ sniffio==1.3.0 # via anyio sqlalchemy[asyncio]==2.0.4 # via - # -r requirements.in + # -r model-engine/requirements.in # alembic sse-starlette==1.6.1 - # via -r requirements.in + # via -r model-engine/requirements.in sseclient-py==1.7.2 - # via -r requirements.in + # via -r model-engine/requirements.in starlette==0.19.1 # via # fastapi @@ -353,18 +397,23 @@ tblib==2.0.0 # via celery tenacity==6.2.0 # via - # -r requirements.in + # -r model-engine/requirements.in # ddtrace testing-common-database==2.0.3 # via testing-postgresql testing-postgresql==1.3.0 - # via -r requirements.in + # via -r model-engine/requirements.in +tomli==2.0.1 + # via + # build + # hypercorn + # pep517 tqdm==4.65.0 # via - # -r requirements.in + # -r model-engine/requirements.in # twine twine==3.7.1 - # via -r requirements.in + # via -r model-engine/requirements.in types-awscrt==0.16.23 # via # botocore-stubs @@ -374,15 +423,32 @@ types-s3transfer==0.6.1 typing-extensions==4.7.1 # via # aioredis + # asgiref # boto3-stubs + # botocore-stubs + # bytecode + # cattrs # datadog-api-client + # ddtrace + # kombu + # mypy-boto3-cloudformation + # mypy-boto3-dynamodb + # mypy-boto3-ec2 + # mypy-boto3-lambda + # mypy-boto3-rds + # mypy-boto3-s3 + # mypy-boto3-sqs # pydantic + # rich # sqlalchemy + # starlette # typing-inspect typing-inspect==0.9.0 # via dataclasses-json tzdata==2023.3 - # via celery + # via + # backports-zoneinfo + # celery urllib3==1.26.16 # via # botocore @@ -394,9 +460,9 @@ urllib3==1.26.16 # kubernetes-asyncio # requests uvicorn==0.17.6 - # via -r requirements.in + # via -r model-engine/requirements.in uvloop==0.17.0 - # via -r requirements.in + # via -r model-engine/requirements.in vine==5.0.0 # via # amqp @@ -414,12 +480,16 @@ werkzeug==2.3.6 # via quart wsproto==1.2.0 # via hypercorn +xmltodict==0.13.0 + # via ddtrace yarl==1.9.2 # via - # -r requirements.in + # -r model-engine/requirements.in # aiohttp zipp==3.16.0 - # via importlib-metadata + # via + # importlib-metadata + # importlib-resources # The following packages are considered to be unsafe in a requirements file: setuptools==68.0.0