From e6486c5765ff005f78e1f930f9493f727dfd29e0 Mon Sep 17 00:00:00 2001 From: Lun-Kai Hsu Date: Thu, 22 Mar 2018 16:47:05 -0700 Subject: [PATCH] Enhance tf serving gpu test (#485) * debug gpu test * raise timeout for tf deployment * use different log name per deployment * add more retry for prediction * cleanup --- testing/test_deploy.py | 7 ++++--- testing/test_tf_serving.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/testing/test_deploy.py b/testing/test_deploy.py index 18e256b5ff9..9393eb1ca44 100644 --- a/testing/test_deploy.py +++ b/testing/test_deploy.py @@ -221,7 +221,7 @@ def deploy_model(args): if not cluster_ip: raise ValueError("inception service wasn't assigned a cluster ip.") - util.wait_for_deployment(api_client, namespace, args.deploy_name, timeout_minutes=6) + util.wait_for_deployment(api_client, namespace, args.deploy_name, timeout_minutes=10) logging.info("Verified TF serving started.") def teardown(args): @@ -555,8 +555,9 @@ def main(): # pylint: disable=too-many-locals if not args.artifacts_dir: args.artifacts_dir = args.test_dir - test_log = os.path.join(args.artifacts_dir, "logs", - "test_deploy." + args.func.__name__ + ".log.txt") + test_log = os.path.join( + args.artifacts_dir, "logs", + "test_deploy." + args.func.__name__ + args.deploy_name + ".log.txt") if not os.path.exists(os.path.dirname(test_log)): os.makedirs(os.path.dirname(test_log)) diff --git a/testing/test_tf_serving.py b/testing/test_tf_serving.py index 77feed374ba..0c6cd64b6c9 100644 --- a/testing/test_tf_serving.py +++ b/testing/test_tf_serving.py @@ -94,7 +94,7 @@ def main(): result = str(stub.Predict(request, 10.0)) # 10 secs timeout except Exception as e: num_try += 1 - if num_try > 3: + if num_try > 10: raise logging.info('prediction failed: {}. Retrying...'.format(e)) time.sleep(5)