Skip to content

Commit

Permalink
Enhance tf serving gpu test (#485)
Browse files Browse the repository at this point in the history
* debug gpu test

* raise timeout for tf deployment

* use different log name per deployment

* add more retry for prediction

* cleanup
  • Loading branch information
lluunn authored and k8s-ci-robot committed Mar 22, 2018
1 parent febb21d commit e6486c5
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
7 changes: 4 additions & 3 deletions testing/test_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def deploy_model(args):

if not cluster_ip:
raise ValueError("inception service wasn't assigned a cluster ip.")
util.wait_for_deployment(api_client, namespace, args.deploy_name, timeout_minutes=6)
util.wait_for_deployment(api_client, namespace, args.deploy_name, timeout_minutes=10)
logging.info("Verified TF serving started.")

def teardown(args):
Expand Down Expand Up @@ -555,8 +555,9 @@ def main(): # pylint: disable=too-many-locals
if not args.artifacts_dir:
args.artifacts_dir = args.test_dir

test_log = os.path.join(args.artifacts_dir, "logs",
"test_deploy." + args.func.__name__ + ".log.txt")
test_log = os.path.join(
args.artifacts_dir, "logs",
"test_deploy." + args.func.__name__ + args.deploy_name + ".log.txt")
if not os.path.exists(os.path.dirname(test_log)):
os.makedirs(os.path.dirname(test_log))

Expand Down
2 changes: 1 addition & 1 deletion testing/test_tf_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def main():
result = str(stub.Predict(request, 10.0)) # 10 secs timeout
except Exception as e:
num_try += 1
if num_try > 3:
if num_try > 10:
raise
logging.info('prediction failed: {}. Retrying...'.format(e))
time.sleep(5)
Expand Down

0 comments on commit e6486c5

Please sign in to comment.