Fix issue #113: Make train_files and eval_files passed to trainer use…

…r module to have same type (List[str]). Also removing unused output_dir in params passed to user module. PiperOrigin-RevId: 248586432
tensorflow · May 17, 2019 · cb0404d · cb0404d
1 parent a9e2c3d
commit cb0404d
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 4 deletions.
diff --git a/RELEASE.md b/RELEASE.md
@@ -13,11 +13,12 @@
 *   Declared 'cmle_training_args' on trainer and 'cmle_serving_args' on
     pusher deprecated. User should use the `trainer/pusher` executors in
     tfx.extensions.google_cloud_ai_platform module instead.
-
 *   Update components and code samples to use `tft.TFTransformOutput` (
     introduced in tensorflow_transform 0.8).  This avoids directly accessing the
     DatasetSchema object which may be removed in tensorflow_transform 0.14 or
     0.15.
+*   Fixes issue #113 to have consistent type of train_files and eval_files
+    passed to trainer user module.
 
 ## Breaking changes
 

diff --git a/tfx/components/trainer/executor.py b/tfx/components/trainer/executor.py
@@ -126,8 +126,10 @@ def Do(self, input_dict: Dict[Text, List[types.TfxType]],
             types.get_split_uri(input_dict['transformed_examples'], 'train'))
     ]
     transform_output = types.get_single_uri(input_dict['transform_output'])
-    eval_files = _all_files_pattern(
-        types.get_split_uri(input_dict['transformed_examples'], 'eval'))
+    eval_files = [
+        _all_files_pattern(
+            types.get_split_uri(input_dict['transformed_examples'], 'eval'))
+    ]
     schema_file = io_utils.get_only_uri_in_dir(
         types.get_single_uri(input_dict['schema']))
 
@@ -159,14 +161,21 @@ def Do(self, input_dict: Dict[Text, List[types.TfxType]],
 
     # TODO(b/126242806) Use PipelineInputs when it is available in third_party.
     hparams = tf.contrib.training.HParams(
+        # A list of uris for train files.
         train_files=train_files,
+        # A single uri for transform graph produced by TFT.
         transform_output=transform_output,
-        output_dir=output_path,
+        # A single uri for the output directory of the serving model.
         serving_model_dir=serving_model_dir,
+        # A list of uris for eval files.
         eval_files=eval_files,
+        # A single uri for schema file.
         schema_file=schema_file,
+        # Number of train steps.
         train_steps=train_steps,
+        # Number of eval steps.
         eval_steps=eval_steps,
+        # A single uri for the model directory to warm start from.
         warm_start_from=warm_start_from)
 
     schema = io_utils.parse_pbtxt_file(schema_file, schema_pb2.Schema())