pytorch · Jack-Khuu · Aug 3, 2024 · Jul 31, 2024 · Aug 1, 2024 · Aug 1, 2024
diff --git a/build/builder.py b/build/builder.py
@@ -103,15 +103,18 @@ def from_args(cls, args):  # -> BuilderArgs:
                 model_config.transformer_params_key or model_config.name.split("/")[-1]
             )
 
+        dso_path = getattr(args, "dso_path", None)
+        pte_path = getattr(args, "pte_path", None)
+
         is_chat_model = False
         if args.is_chat_model:
             is_chat_model = True
         else:
             for path in [
                 checkpoint_path,
                 checkpoint_dir,
-                args.dso_path,
-                args.pte_path,
+                dso_path,
+                pte_path,
                 args.gguf_path,
             ]:
                 if path is not None:
@@ -125,7 +128,10 @@ def from_args(cls, args):  # -> BuilderArgs:
                     if "chat" in path_basename or "instruct" in path_basename:
                         is_chat_model = True
 
-        if args.output_pte_path and args.dtype.startswith("fast"):
+
+        output_pte_path = getattr(args, "output_pte_path", None)
+        output_dso_path = getattr(args, "output_dso_path", None)
+        if output_pte_path and args.dtype.startswith("fast"):
             if args.dtype == "fast":
                 # As per Kimish, float32 should be faster on ET XNNPACK
                 # (because fp16 is implemented as upcast to fp32 for several
@@ -144,11 +150,11 @@ def from_args(cls, args):  # -> BuilderArgs:
             params_table=params_table,
             gguf_path=args.gguf_path,
             gguf_kwargs=None,
-            dso_path=args.dso_path,
-            pte_path=args.pte_path,
+            dso_path=dso_path,
+            pte_path=pte_path,
             device=args.device,
             precision=dtype,
-            setup_caches=(args.output_dso_path or args.output_pte_path),
+            setup_caches=(output_dso_path or output_pte_path),
             use_distributed=args.distributed,
             is_chat_model=is_chat_model,
         )
@@ -355,27 +361,27 @@ def _maybe_init_distributed(
     builder_args: BuilderArgs,
 ) -> Tuple[Optional[DeviceMesh], Optional[ParallelDims]]:
     """
-    Initialize distributed related setups if the user specified 
+    Initialize distributed related setups if the user specified
     using distributed inference. If not, this is a no-op.
 
     Args:
         builder_args (:class:`BuilderArgs`):
             Command args for model building.
     Returns:
-        Tuple[Optional[DeviceMesh], Optional[ParallelDims]]: 
-            - The first element is an optional DeviceMesh object, 
+        Tuple[Optional[DeviceMesh], Optional[ParallelDims]]:
+            - The first element is an optional DeviceMesh object,
             which which describes the mesh topology of devices for the DTensor.
-            - The second element is an optional ParallelDims object, 
+            - The second element is an optional ParallelDims object,
             which represents the parallel dimensions configuration.
     """
     if not builder_args.use_distributed:
         return None, None
     dist_config = 'llama3_8B.toml'  # TODO - integrate with chat cmd line
-    
-    world_mesh, parallel_dims = launch_distributed(dist_config) 
-    
+
+    world_mesh, parallel_dims = launch_distributed(dist_config)
+
     assert world_mesh is not None and parallel_dims is not None, f"failed to launch distributed using {dist_config}"
-    
+
     return world_mesh, parallel_dims
 
 

diff --git a/cli.py b/cli.py
@@ -29,7 +29,7 @@
 INVENTORY_VERBS = ["download", "list", "remove", "where"]
 
 # Subcommands related to generating inference output based on user prompts
-GENERATION_VERBS = ["browser", "chat", "generate", "server"] 
+GENERATION_VERBS = ["browser", "chat", "generate", "server"]
 
 # List of all supported subcommands in torchchat
 KNOWN_VERBS = GENERATION_VERBS + ["eval", "export"] + INVENTORY_VERBS
@@ -49,9 +49,6 @@ def check_args(args, verb: str) -> None:
 
 # Given a arg parser and a subcommand (verb), add the appropriate arguments
 # for that subcommand.
-#
-# Note the use of argparse.SUPPRESS to hide arguments from --help due to 
-# legacy CLI arg parsing. See https://github.com/pytorch/torchchat/issues/932
 def add_arguments_for_verb(parser, verb: str) -> None:
     # Argument closure for inventory related subcommands
     if verb in INVENTORY_VERBS:
@@ -62,17 +59,17 @@ def add_arguments_for_verb(parser, verb: str) -> None:
     # Add argument groups for model specification (what base model to use)
     _add_model_specification_args(parser)
 
-    # Add argument groups for exported model path IO
-    _add_exported_input_path_args(parser, verb)
-    _add_export_output_path_args(parser, verb)
-
     # Add argument groups for model configuration (compilation, quant, etc)
     _add_model_config_args(parser, verb)
 
     # Add thematic argument groups based on the subcommand
-    if verb in ["browser", "chat", "generate", "server"]:
+    if verb in GENERATION_VERBS:
+        _add_exported_input_path_args(parser)
         _add_generation_args(parser, verb)
+    if verb == "export":
+        _add_export_output_path_args(parser)
     if verb == "eval":
+        _add_exported_input_path_args(parser)
         _add_evaluation_args(parser)
 
     # Add CLI Args related to downloading of model artifacts (if not already downloaded)
@@ -89,8 +86,13 @@ def add_arguments_for_verb(parser, verb: str) -> None:
 
 # Add CLI Args related to model specification (what base model to use)
 def _add_model_specification_args(parser) -> None:
-    model_specification_parser = parser.add_argument_group("Model Specification", "(REQUIRED) Specify the base model. Args are mutually exclusive.")
-    exclusive_parser = model_specification_parser.add_mutually_exclusive_group(required=True)
+    model_specification_parser = parser.add_argument_group(
+        "Model Specification",
+        "(REQUIRED) Specify the base model. Args are mutually exclusive.",
+    )
+    exclusive_parser = model_specification_parser.add_mutually_exclusive_group(
+        required=True
+    )
     exclusive_parser.add_argument(
         "model",
         type=str,
@@ -120,20 +122,26 @@ def _add_model_specification_args(parser) -> None:
         help=argparse.SUPPRESS,
     )
 
+
 # Add CLI Args related to model configuration (compilation, quant, etc)
+# Excludes compile args if subcommand is export
 def _add_model_config_args(parser, verb: str) -> None:
-    is_not_export = verb != "export"
-    model_config_parser = parser.add_argument_group("Model Configuration", "Specify model configurations")
-    model_config_parser.add_argument(
-        "--compile",
-        action="store_true",
-        help="Whether to compile the model with torch.compile" if is_not_export else argparse.SUPPRESS,
-    )
-    model_config_parser.add_argument(
-        "--compile-prefill",
-        action="store_true",
-        help="Whether to compile the prefill. Improves prefill perf, but has higher compile times." if is_not_export else argparse.SUPPRESS,
+    model_config_parser = parser.add_argument_group(
+        "Model Configuration", "Specify model configurations"
     )
+
+    if verb != "export":
+        model_config_parser.add_argument(
+            "--compile",
+            action="store_true",
+            help="Whether to compile the model with torch.compile",
+        )
+        model_config_parser.add_argument(
+            "--compile-prefill",
+            action="store_true",
+            help="Whether to compile the prefill. Improves prefill perf, but has higher compile times.",
+        )
+
     model_config_parser.add_argument(
         "--dtype",
         default="fast",
@@ -157,54 +165,55 @@ def _add_model_config_args(parser, verb: str) -> None:
         help="Hardware device to use. Options: cpu, cuda, mps",
     )
 
-# Add CLI Args representing output paths of exported model files
-def _add_export_output_path_args(parser, verb: str) -> None:
-    is_export = verb == "export"
 
+# Add CLI Args representing output paths of exported model files
+def _add_export_output_path_args(parser) -> None:
     output_path_parser = parser.add_argument_group(
-        "Export Output Path" if is_export else None,
-        "Specify the output path for the exported model files" if is_export else None,
+        "Export Output Path",
+        "Specify the output path for the exported model files",
     )
     exclusive_parser = output_path_parser.add_mutually_exclusive_group()
     exclusive_parser.add_argument(
         "--output-pte-path",
         type=str,
         default=None,
-        help="Output to the specified ExecuTorch .pte model file" if is_export else argparse.SUPPRESS,
+        help="Output to the specified ExecuTorch .pte model file",
     )
     exclusive_parser.add_argument(
         "--output-dso-path",
         type=str,
         default=None,
-        help="Output to the specified AOT Inductor .dso model file" if is_export else argparse.SUPPRESS,
+        help="Output to the specified AOT Inductor .dso model file",
     )
 
 
 # Add CLI Args representing user provided exported model files
-def _add_exported_input_path_args(parser, verb: str) -> None:
-    is_generation_verb = verb in GENERATION_VERBS
-
+def _add_exported_input_path_args(parser) -> None:
     exported_model_path_parser = parser.add_argument_group(
-        "Exported Model Path" if is_generation_verb else None,
-        "Specify the path of the exported model files to ingest" if is_generation_verb else None,
+        "Exported Model Path",
+        "Specify the path of the exported model files to ingest",
     )
     exclusive_parser = exported_model_path_parser.add_mutually_exclusive_group()
     exclusive_parser.add_argument(
         "--dso-path",
         type=Path,
         default=None,
-        help="Use the specified AOT Inductor .dso model file" if is_generation_verb else argparse.SUPPRESS,
+        help="Use the specified AOT Inductor .dso model file",
     )
     exclusive_parser.add_argument(
         "--pte-path",
         type=Path,
         default=None,
-        help="Use the specified ExecuTorch .pte model file" if is_generation_verb else argparse.SUPPRESS,
+        help="Use the specified ExecuTorch .pte model file",
     )
 
+
 # Add CLI Args related to JIT downloading of model artifacts
 def _add_jit_downloading_args(parser) -> None:
-    jit_downloading_parser = parser.add_argument_group("Model Downloading", "Specify args for model downloading (if model is not downloaded)",)
+    jit_downloading_parser = parser.add_argument_group(
+        "Model Downloading",
+        "Specify args for model downloading (if model is not downloaded)",
+    )
     jit_downloading_parser.add_argument(
         "--hf-token",
         type=str,
@@ -217,7 +226,8 @@ def _add_jit_downloading_args(parser) -> None:
         default=default_model_dir,
         help=f"The directory to store downloaded model artifacts. Default: {default_model_dir}",
     )
-
+
+
 # Add CLI Args that are general to subcommand cli execution
 def _add_cli_metadata_args(parser) -> None:
     parser.add_argument(
@@ -270,16 +280,26 @@ def _configure_artifact_inventory_args(parser, verb: str) -> None:
 
 
 # Add CLI Args specific to user prompted generation
+# Include prompt and num_sample args when the subcommand is generate
 def _add_generation_args(parser, verb: str) -> None:
     generator_parser = parser.add_argument_group(
         "Generation", "Configs for generating output based on provided prompt"
     )
-    generator_parser.add_argument(
-        "--prompt",
-        type=str,
-        default="Hello, my name is",
-        help="Input prompt for manual output generation" if verb == "generate" else argparse.SUPPRESS,
-    )
+
+    if verb == "generate":
+        generator_parser.add_argument(
+            "--prompt",
+            type=str,
+            default="Hello, my name is",
+            help="Input prompt for manual output generation",
+        )
+        generator_parser.add_argument(
+            "--num-samples",
+            type=int,
+            default=1,
+            help="Number of samples",
+        )
+
     generator_parser.add_argument(
         "--chat",
         action="store_true",
@@ -292,12 +312,6 @@ def _add_generation_args(parser, verb: str) -> None:
         # help="Whether to use a web UI for an interactive chat session",
         help=argparse.SUPPRESS,
     )
-    generator_parser.add_argument(
-        "--num-samples",
-        type=int,
-        default=1,
-        help="Number of samples" if verb == "generate" else argparse.SUPPRESS,
-    )
     generator_parser.add_argument(
         "--max-new-tokens",
         type=int,
@@ -441,7 +455,7 @@ def arg_init(args):
     # if we specify dtype in quantization recipe, replicate it as args.dtype
     args.dtype = args.quantize.get("precision", {}).get("dtype", args.dtype)
 
-    if args.output_pte_path:
+    if getattr(args, "output_pte_path", None):
         if args.device not in ["cpu", "fast"]:
             raise RuntimeError("Device not supported by ExecuTorch")
         args.device = "cpu"
@@ -451,12 +465,12 @@ def arg_init(args):
         )
 
     if "mps" in args.device:
-        if args.compile or args.compile_prefill:
+        if hasattr(args, "compile") and hasattr(args, "compile_prefill"):
             print(
                 "Warning: compilation is not available with device MPS, ignoring option to engage compilation"
             )
-            args.compile = False
-            args.compile_prefill = False
+            vars(args)["compile"] = False
+            vars(args)["compile_prefill"] = False
 
     if hasattr(args, "seed") and args.seed:
         torch.manual_seed(args.seed)

diff --git a/generate.py b/generate.py
@@ -103,8 +103,10 @@ def validate_build(
 
     @classmethod
     def from_args(cls, args):
+        dso_path = getattr(args, "dso_path", None)
+        pte_path = getattr(args, "pte_path", None)
         sequential_prefill = (
-            args.sequential_prefill or bool(args.dso_path) or bool(args.pte_path)
+            args.sequential_prefill or bool(dso_path) or bool(pte_path)
         )
 
         return cls(