From a608145832fc9010a96bdb57ca5b2d513b0ba09e Mon Sep 17 00:00:00 2001 From: Yaron Haviv Date: Thu, 24 Feb 2022 14:22:30 +0200 Subject: [PATCH] [Runtimes] Add flag to allow printing build logs only on failure (#1777) --- mlrun/runtimes/base.py | 2 +- mlrun/runtimes/daskjob.py | 15 ++++++++++++++- mlrun/runtimes/kubejob.py | 29 +++++++++++++++++++++-------- mlrun/runtimes/remotesparkjob.py | 15 ++++++++++++++- mlrun/runtimes/sparkjob/abstract.py | 15 ++++++++++++++- 5 files changed, 64 insertions(+), 12 deletions(-) diff --git a/mlrun/runtimes/base.py b/mlrun/runtimes/base.py index c55ca0aeb7b..d30b455cac2 100644 --- a/mlrun/runtimes/base.py +++ b/mlrun/runtimes/base.py @@ -428,7 +428,7 @@ def run( logger.info( "Function is not deployed and auto_build flag is set, starting deploy..." ) - self.deploy(skip_deployed=True) + self.deploy(skip_deployed=True, show_on_failure=True) else: raise RunError( "function image is not built/ready, use .deploy() method first" diff --git a/mlrun/runtimes/daskjob.py b/mlrun/runtimes/daskjob.py index 4f11629de47..82fb05cfb16 100644 --- a/mlrun/runtimes/daskjob.py +++ b/mlrun/runtimes/daskjob.py @@ -378,14 +378,27 @@ def deploy( skip_deployed=False, is_kfp=False, mlrun_version_specifier=None, + show_on_failure: bool = False, ): - """deploy function, build container with dependencies""" + """deploy function, build container with dependencies + + :param watch: wait for the deploy to complete (and print build logs) + :param with_mlrun: add the current mlrun package to the container build + :param skip_deployed: skip the build if we already have an image for the function + :param mlrun_version_specifier: which mlrun package version to include (if not current) + :param builder_env: Kaniko builder pod env vars dict (for config/credentials) + e.g. builder_env={"GIT_TOKEN": token} + :param show_on_failure: show logs only in case of build failure + + :return True if the function is ready (deployed) + """ return super().deploy( watch, with_mlrun, skip_deployed, is_kfp=is_kfp, mlrun_version_specifier=mlrun_version_specifier, + show_on_failure=show_on_failure, ) def gpus(self, gpus, gpu_type="nvidia.com/gpu"): diff --git a/mlrun/runtimes/kubejob.py b/mlrun/runtimes/kubejob.py index 89f68d0de9c..24840b529a0 100644 --- a/mlrun/runtimes/kubejob.py +++ b/mlrun/runtimes/kubejob.py @@ -126,6 +126,7 @@ def deploy( is_kfp=False, mlrun_version_specifier=None, builder_env: dict = None, + show_on_failure: bool = False, ) -> bool: """deploy function, build container with dependencies @@ -135,6 +136,7 @@ def deploy( :param mlrun_version_specifier: which mlrun package version to include (if not current) :param builder_env: Kaniko builder pod env vars dict (for config/credentials) e.g. builder_env={"GIT_TOKEN": token} + :param show_on_failure: show logs only in case of build failure :return True if the function is ready (deployed) """ @@ -177,7 +179,7 @@ def deploy( f"Started building image: {data.get('data', {}).get('spec', {}).get('build', {}).get('image')}" ) if watch and not ready: - state = self._build_watch(watch) + state = self._build_watch(watch, show_on_failure=show_on_failure) ready = state == "ready" self.status.state = state else: @@ -196,7 +198,7 @@ def deploy( raise mlrun.errors.MLRunRuntimeError("Deploy failed") return ready - def _build_watch(self, watch=True, logs=True): + def _build_watch(self, watch=True, logs=True, show_on_failure=False): db = self._get_db() offset = 0 try: @@ -204,16 +206,27 @@ def _build_watch(self, watch=True, logs=True): except RunDBError: raise ValueError("function or build process not found") - if text: - print(text) + def print_log(text): + if text and (not show_on_failure or self.status.state == "error"): + print(text, end="") + + print_log(text) + offset += len(text) if watch: while self.status.state in ["pending", "running"]: - offset += len(text) time.sleep(2) - text, _ = db.get_builder_status(self, offset, logs=logs) - if text: - print(text, end="") + if show_on_failure: + text = "" + db.get_builder_status(self, 0, logs=False) + if self.status.state == "error": + # re-read the full log on failure + text, _ = db.get_builder_status(self, offset, logs=logs) + else: + text, _ = db.get_builder_status(self, offset, logs=logs) + print_log(text) + offset += len(text) + print() return self.status.state def builder_status(self, watch=True, logs=True): diff --git a/mlrun/runtimes/remotesparkjob.py b/mlrun/runtimes/remotesparkjob.py index 2b154253cca..137698d21e7 100644 --- a/mlrun/runtimes/remotesparkjob.py +++ b/mlrun/runtimes/remotesparkjob.py @@ -157,8 +157,20 @@ def deploy( skip_deployed=False, is_kfp=False, mlrun_version_specifier=None, + show_on_failure: bool = False, ): - """deploy function, build container with dependencies""" + """deploy function, build container with dependencies + + :param watch: wait for the deploy to complete (and print build logs) + :param with_mlrun: add the current mlrun package to the container build + :param skip_deployed: skip the build if we already have an image for the function + :param mlrun_version_specifier: which mlrun package version to include (if not current) + :param builder_env: Kaniko builder pod env vars dict (for config/credentials) + e.g. builder_env={"GIT_TOKEN": token} + :param show_on_failure: show logs only in case of build failure + + :return True if the function is ready (deployed) + """ # connect will populate the config from the server config if not self.spec.build.base_image: self.spec.build.base_image = self._resolve_default_base_image @@ -168,6 +180,7 @@ def deploy( skip_deployed=skip_deployed, is_kfp=is_kfp, mlrun_version_specifier=mlrun_version_specifier, + show_on_failure=show_on_failure, ) diff --git a/mlrun/runtimes/sparkjob/abstract.py b/mlrun/runtimes/sparkjob/abstract.py index 10d8053dc3e..052b0cea132 100644 --- a/mlrun/runtimes/sparkjob/abstract.py +++ b/mlrun/runtimes/sparkjob/abstract.py @@ -236,8 +236,20 @@ def deploy( skip_deployed=False, is_kfp=False, mlrun_version_specifier=None, + show_on_failure: bool = False, ): - """deploy function, build container with dependencies""" + """deploy function, build container with dependencies + + :param watch: wait for the deploy to complete (and print build logs) + :param with_mlrun: add the current mlrun package to the container build + :param skip_deployed: skip the build if we already have an image for the function + :param mlrun_version_specifier: which mlrun package version to include (if not current) + :param builder_env: Kaniko builder pod env vars dict (for config/credentials) + e.g. builder_env={"GIT_TOKEN": token} + :param show_on_failure: show logs only in case of build failure + + :return True if the function is ready (deployed) + """ # connect will populate the config from the server config get_run_db() if not self.spec.build.base_image: @@ -248,6 +260,7 @@ def deploy( skip_deployed=skip_deployed, is_kfp=is_kfp, mlrun_version_specifier=mlrun_version_specifier, + show_on_failure=show_on_failure, ) @staticmethod