From a608145832fc9010a96bdb57ca5b2d513b0ba09e Mon Sep 17 00:00:00 2001
From: Yaron Haviv <yhaviv@gmail.com>
Date: Thu, 24 Feb 2022 14:22:30 +0200
Subject: [PATCH] [Runtimes] Add flag to allow printing build logs only on
 failure (#1777)

---
 mlrun/runtimes/base.py              |  2 +-
 mlrun/runtimes/daskjob.py           | 15 ++++++++++++++-
 mlrun/runtimes/kubejob.py           | 29 +++++++++++++++++++++--------
 mlrun/runtimes/remotesparkjob.py    | 15 ++++++++++++++-
 mlrun/runtimes/sparkjob/abstract.py | 15 ++++++++++++++-
 5 files changed, 64 insertions(+), 12 deletions(-)

diff --git a/mlrun/runtimes/base.py b/mlrun/runtimes/base.py
index c55ca0aeb7b..d30b455cac2 100644
--- a/mlrun/runtimes/base.py
+++ b/mlrun/runtimes/base.py
@@ -428,7 +428,7 @@ def run(
                 logger.info(
                     "Function is not deployed and auto_build flag is set, starting deploy..."
                 )
-                self.deploy(skip_deployed=True)
+                self.deploy(skip_deployed=True, show_on_failure=True)
             else:
                 raise RunError(
                     "function image is not built/ready, use .deploy() method first"
diff --git a/mlrun/runtimes/daskjob.py b/mlrun/runtimes/daskjob.py
index 4f11629de47..82fb05cfb16 100644
--- a/mlrun/runtimes/daskjob.py
+++ b/mlrun/runtimes/daskjob.py
@@ -378,14 +378,27 @@ def deploy(
         skip_deployed=False,
         is_kfp=False,
         mlrun_version_specifier=None,
+        show_on_failure: bool = False,
     ):
-        """deploy function, build container with dependencies"""
+        """deploy function, build container with dependencies
+
+        :param watch:      wait for the deploy to complete (and print build logs)
+        :param with_mlrun: add the current mlrun package to the container build
+        :param skip_deployed: skip the build if we already have an image for the function
+        :param mlrun_version_specifier:  which mlrun package version to include (if not current)
+        :param builder_env:   Kaniko builder pod env vars dict (for config/credentials)
+                              e.g. builder_env={"GIT_TOKEN": token}
+        :param show_on_failure:  show logs only in case of build failure
+
+        :return True if the function is ready (deployed)
+        """
         return super().deploy(
             watch,
             with_mlrun,
             skip_deployed,
             is_kfp=is_kfp,
             mlrun_version_specifier=mlrun_version_specifier,
+            show_on_failure=show_on_failure,
         )
 
     def gpus(self, gpus, gpu_type="nvidia.com/gpu"):
diff --git a/mlrun/runtimes/kubejob.py b/mlrun/runtimes/kubejob.py
index 89f68d0de9c..24840b529a0 100644
--- a/mlrun/runtimes/kubejob.py
+++ b/mlrun/runtimes/kubejob.py
@@ -126,6 +126,7 @@ def deploy(
         is_kfp=False,
         mlrun_version_specifier=None,
         builder_env: dict = None,
+        show_on_failure: bool = False,
     ) -> bool:
         """deploy function, build container with dependencies
 
@@ -135,6 +136,7 @@ def deploy(
         :param mlrun_version_specifier:  which mlrun package version to include (if not current)
         :param builder_env:   Kaniko builder pod env vars dict (for config/credentials)
                               e.g. builder_env={"GIT_TOKEN": token}
+        :param show_on_failure:  show logs only in case of build failure
 
         :return True if the function is ready (deployed)
         """
@@ -177,7 +179,7 @@ def deploy(
                     f"Started building image: {data.get('data', {}).get('spec', {}).get('build', {}).get('image')}"
                 )
             if watch and not ready:
-                state = self._build_watch(watch)
+                state = self._build_watch(watch, show_on_failure=show_on_failure)
                 ready = state == "ready"
                 self.status.state = state
         else:
@@ -196,7 +198,7 @@ def deploy(
             raise mlrun.errors.MLRunRuntimeError("Deploy failed")
         return ready
 
-    def _build_watch(self, watch=True, logs=True):
+    def _build_watch(self, watch=True, logs=True, show_on_failure=False):
         db = self._get_db()
         offset = 0
         try:
@@ -204,16 +206,27 @@ def _build_watch(self, watch=True, logs=True):
         except RunDBError:
             raise ValueError("function or build process not found")
 
-        if text:
-            print(text)
+        def print_log(text):
+            if text and (not show_on_failure or self.status.state == "error"):
+                print(text, end="")
+
+        print_log(text)
+        offset += len(text)
         if watch:
             while self.status.state in ["pending", "running"]:
-                offset += len(text)
                 time.sleep(2)
-                text, _ = db.get_builder_status(self, offset, logs=logs)
-                if text:
-                    print(text, end="")
+                if show_on_failure:
+                    text = ""
+                    db.get_builder_status(self, 0, logs=False)
+                    if self.status.state == "error":
+                        # re-read the full log on failure
+                        text, _ = db.get_builder_status(self, offset, logs=logs)
+                else:
+                    text, _ = db.get_builder_status(self, offset, logs=logs)
+                print_log(text)
+                offset += len(text)
 
+        print()
         return self.status.state
 
     def builder_status(self, watch=True, logs=True):
diff --git a/mlrun/runtimes/remotesparkjob.py b/mlrun/runtimes/remotesparkjob.py
index 2b154253cca..137698d21e7 100644
--- a/mlrun/runtimes/remotesparkjob.py
+++ b/mlrun/runtimes/remotesparkjob.py
@@ -157,8 +157,20 @@ def deploy(
         skip_deployed=False,
         is_kfp=False,
         mlrun_version_specifier=None,
+        show_on_failure: bool = False,
     ):
-        """deploy function, build container with dependencies"""
+        """deploy function, build container with dependencies
+
+        :param watch:      wait for the deploy to complete (and print build logs)
+        :param with_mlrun: add the current mlrun package to the container build
+        :param skip_deployed: skip the build if we already have an image for the function
+        :param mlrun_version_specifier:  which mlrun package version to include (if not current)
+        :param builder_env:   Kaniko builder pod env vars dict (for config/credentials)
+                              e.g. builder_env={"GIT_TOKEN": token}
+        :param show_on_failure:  show logs only in case of build failure
+
+        :return True if the function is ready (deployed)
+        """
         # connect will populate the config from the server config
         if not self.spec.build.base_image:
             self.spec.build.base_image = self._resolve_default_base_image
@@ -168,6 +180,7 @@ def deploy(
             skip_deployed=skip_deployed,
             is_kfp=is_kfp,
             mlrun_version_specifier=mlrun_version_specifier,
+            show_on_failure=show_on_failure,
         )
 
 
diff --git a/mlrun/runtimes/sparkjob/abstract.py b/mlrun/runtimes/sparkjob/abstract.py
index 10d8053dc3e..052b0cea132 100644
--- a/mlrun/runtimes/sparkjob/abstract.py
+++ b/mlrun/runtimes/sparkjob/abstract.py
@@ -236,8 +236,20 @@ def deploy(
         skip_deployed=False,
         is_kfp=False,
         mlrun_version_specifier=None,
+        show_on_failure: bool = False,
     ):
-        """deploy function, build container with dependencies"""
+        """deploy function, build container with dependencies
+
+        :param watch:      wait for the deploy to complete (and print build logs)
+        :param with_mlrun: add the current mlrun package to the container build
+        :param skip_deployed: skip the build if we already have an image for the function
+        :param mlrun_version_specifier:  which mlrun package version to include (if not current)
+        :param builder_env:   Kaniko builder pod env vars dict (for config/credentials)
+                              e.g. builder_env={"GIT_TOKEN": token}
+        :param show_on_failure:  show logs only in case of build failure
+
+        :return True if the function is ready (deployed)
+        """
         # connect will populate the config from the server config
         get_run_db()
         if not self.spec.build.base_image:
@@ -248,6 +260,7 @@ def deploy(
             skip_deployed=skip_deployed,
             is_kfp=is_kfp,
             mlrun_version_specifier=mlrun_version_specifier,
+            show_on_failure=show_on_failure,
         )
 
     @staticmethod