From 3e3cf55b049c83022ca108f2196229fc464d7f8a Mon Sep 17 00:00:00 2001 From: Jack Khuu Date: Tue, 9 Jul 2024 20:43:18 -0700 Subject: [PATCH 1/6] Fixing the help mode of the download subcommand --- cli.py | 62 ++++++++++++++++++++++++++++++++++++++++++++-------- download.py | 7 ------ torchchat.py | 17 +++++++++----- 3 files changed, 64 insertions(+), 22 deletions(-) diff --git a/cli.py b/cli.py index 1841a533f..a06ff5de1 100644 --- a/cli.py +++ b/cli.py @@ -24,25 +24,35 @@ ).expanduser() -KNOWN_VERBS = ["chat", "browser", "download", "generate", "eval", "export", "list", "remove", "where"] +# Subcommands related to downloading and managing model artifacts +INVENTORY_VERBS = ["download", "list", "remove", "where"] + +# List of all supported subcommands in torchchat +KNOWN_VERBS = ["chat", "browser", "generate", "eval", "export"] + INVENTORY_VERBS # Handle CLI arguments that are common to a majority of subcommands. def check_args(args, verb: str) -> None: # Handle model download. Skip this for download, since it has slightly # different semantics. if ( - verb not in ["download", "list", "remove"] + verb not in INVENTORY_VERBS and args.model and not is_model_downloaded(args.model, args.model_directory) ): download_and_convert(args.model, args.model_directory, args.hf_token) -def add_arguments_for_verb(parser, verb: str): +def add_arguments_for_verb(parser, verb: str) -> None: # Model specification. TODO Simplify this. # A model can be specified using a positional model name or HuggingFace # path. Alternatively, the model can be specified via --gguf-path or via # an explicit --checkpoint-dir, --checkpoint-path, or --tokenizer-path. + + if verb in INVENTORY_VERBS: + _configure_artifact_inventory_args(parser, verb) + _add_cli_metadata_args(parser) + return + parser.add_argument( "model", type=str, @@ -191,12 +201,6 @@ def add_arguments_for_verb(parser, verb: str): choices=allowable_dtype_names(), help="Override the dtype of the model (default is the checkpoint dtype). Options: bf16, fp16, fp32, fast16, fast", ) - parser.add_argument( - "-v", - "--verbose", - action="store_true", - help="Verbose output", - ) parser.add_argument( "--quantize", type=str, @@ -252,6 +256,46 @@ def add_arguments_for_verb(parser, verb: str): default=5000, help="Port for the web server in browser mode", ) + _add_cli_metadata_args(parser) + + +# Add CLI Args that are relevant to any subcommand execution +def _add_cli_metadata_args(parser) -> None: + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="Verbose output", + ) + + +# Configure CLI Args specific to Model Artifact Management +def _configure_artifact_inventory_args(parser, verb: str) -> None: + if verb in ["download", "remove", "where"]: + parser.add_argument( + "model", + type=str, + nargs="?", + default=None, + help="Model name for well-known models", + ) + + if verb in INVENTORY_VERBS: + parser.add_argument( + "--model-directory", + type=Path, + default=default_model_dir, + help=f"The directory to store downloaded model artifacts. Default: {default_model_dir}", + ) + + if verb == "download": + parser.add_argument( + "--hf-token", + type=str, + default=None, + help="A HuggingFace API token to use when downloading model artifacts", + ) + # Add CLI Args specific to Model Evaluation def _add_evaluation_args(parser) -> None: diff --git a/download.py b/download.py index 41008de3d..9b00638e8 100644 --- a/download.py +++ b/download.py @@ -125,13 +125,6 @@ def is_model_downloaded(model: str, models_dir: Path) -> bool: # Subcommand to list available models. def list_main(args) -> None: - # TODO It would be nice to have argparse validate this. However, we have - # model as an optional named parameter for all subcommands, so we'd - # probably need to move it to be registered per-command. - if args.model: - print("Usage: torchchat.py list") - return - model_configs = load_model_configs() # Build the table in-memory so that we can align the text nicely. diff --git a/torchchat.py b/torchchat.py index 693ef31a1..136dcb271 100644 --- a/torchchat.py +++ b/torchchat.py @@ -12,6 +12,7 @@ from cli import ( add_arguments_for_verb, KNOWN_VERBS, + INVENTORY_VERBS, arg_init, check_args, ) @@ -49,7 +50,11 @@ # Now parse the arguments args = parser.parse_args() - args = arg_init(args) + + # Don't initialize for Inventory management subcommands + # TODO: Remove when arg_init is refactored + if args.command not in INVENTORY_VERBS: + args = arg_init(args) logging.basicConfig( format="%(message)s", level=logging.DEBUG if args.verbose else logging.INFO ) @@ -70,11 +75,6 @@ from browser.browser import main as browser_main browser_main(args) - elif args.command == "download": - check_args(args, "download") - from download import download_main - - download_main(args) elif args.command == "generate": check_args(args, "generate") from generate import main as generate_main @@ -89,6 +89,11 @@ from export import main as export_main export_main(args) + elif args.command == "download": + check_args(args, "download") + from download import download_main + + download_main(args) elif args.command == "list": check_args(args, "list") from download import list_main From 6a870c4fc3bd57b462f72494722b7a0aa0a0964a Mon Sep 17 00:00:00 2001 From: Jack Khuu Date: Tue, 9 Jul 2024 22:11:47 -0700 Subject: [PATCH 2/6] Initial Addition of subparsers for generation --- cli.py | 193 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 108 insertions(+), 85 deletions(-) diff --git a/cli.py b/cli.py index a06ff5de1..df389ed39 100644 --- a/cli.py +++ b/cli.py @@ -61,27 +61,14 @@ def add_arguments_for_verb(parser, verb: str) -> None: help="Model name for well-known models", ) - parser.add_argument( - "--chat", - action="store_true", - help="Whether to start an interactive chat session", - ) + if verb in ["browser", "chat", "generate"]: + _add_generation_args(parser) + parser.add_argument( "--distributed", action="store_true", help="Whether to enable distributed inference", ) - parser.add_argument( - "--gui", - action="store_true", - help="Whether to use a web UI for an interactive chat session", - ) - parser.add_argument( - "--prompt", - type=str, - default="Hello, my name is", - help="Input prompt", - ) parser.add_argument( "--is-chat-model", action="store_true", @@ -93,54 +80,12 @@ def add_arguments_for_verb(parser, verb: str) -> None: default=None, help="Initialize torch seed", ) - parser.add_argument( - "--num-samples", - type=int, - default=1, - help="Number of samples", - ) - parser.add_argument( - "--max-new-tokens", - type=int, - default=200, - help="Maximum number of new tokens", - ) - parser.add_argument( - "--top-k", - type=int, - default=200, - help="Top-k for sampling", - ) - parser.add_argument( - "--temperature", type=float, default=0.8, help="Temperature for sampling" - ) - parser.add_argument( - "--compile", - action="store_true", - help="Whether to compile the model with torch.compile", - ) - parser.add_argument( - "--compile-prefill", - action="store_true", - help="Whether to compile the prefill. Improves prefill perf, but has higher compile times.", - ) - parser.add_argument( - "--sequential-prefill", - action="store_true", - help="Whether to perform prefill sequentially. Only used for model debug.", - ) parser.add_argument( "--profile", type=Path, default=None, help="Profile path.", ) - parser.add_argument( - "--speculate-k", - type=int, - default=5, - help="Speculative execution depth", - ) parser.add_argument( "--draft-checkpoint-path", type=Path, @@ -171,30 +116,10 @@ def add_arguments_for_verb(parser, verb: str) -> None: default=None, help="Use the specified model tokenizer file", ) - parser.add_argument( - "--output-pte-path", - type=str, - default=None, - help="Output to the specified ExecuTorch .pte model file", - ) - parser.add_argument( - "--output-dso-path", - type=str, - default=None, - help="Output to the specified AOT Inductor .dso model file", - ) - parser.add_argument( - "--dso-path", - type=Path, - default=None, - help="Use the specified AOT Inductor .dso model file", - ) - parser.add_argument( - "--pte-path", - type=Path, - default=None, - help="Use the specified ExecuTorch .pte model file", - ) + + _add_exported_model_input_args(parser) + _add_export_output_path_args(parser) + parser.add_argument( "--dtype", default="fast", @@ -259,6 +184,40 @@ def add_arguments_for_verb(parser, verb: str) -> None: _add_cli_metadata_args(parser) +# Add CLI Args representing user provided exported model files +def _add_export_output_path_args(parser) -> None: + output_path_parser = parser.add_argument_group("Export Output Path Args", "Specify the output path for the exported model files") + output_path_parser.add_argument( + "--output-pte-path", + type=str, + default=None, + help="Output to the specified ExecuTorch .pte model file", + ) + output_path_parser.add_argument( + "--output-dso-path", + type=str, + default=None, + help="Output to the specified AOT Inductor .dso model file", + ) + + +# Add CLI Args representing user provided exported model files +def _add_exported_model_input_args(parser) -> None: + exported_model_path_parser = parser.add_argument_group("Exported Model Path Args", "Specify the path of the exported model files to ingest") + exported_model_path_parser.add_argument( + "--dso-path", + type=Path, + default=None, + help="Use the specified AOT Inductor .dso model file", + ) + exported_model_path_parser.add_argument( + "--pte-path", + type=Path, + default=None, + help="Use the specified ExecuTorch .pte model file", + ) + + # Add CLI Args that are relevant to any subcommand execution def _add_cli_metadata_args(parser) -> None: parser.add_argument( @@ -297,22 +256,86 @@ def _configure_artifact_inventory_args(parser, verb: str) -> None: ) +# Add CLI Args specific to user prompted generation +def _add_generation_args(parser) -> None: + generater_parser = parser.add_argument_group("Generation Args", "Configs for generating output based on provided prompt") + generater_parser.add_argument( + "--prompt", + type=str, + default="Hello, my name is", + help="Input prompt for manual output generation", + ) + generater_parser.add_argument( + "--chat", + action="store_true", + help="Whether to start an interactive chat session", + ) + generater_parser.add_argument( + "--gui", + action="store_true", + help="Whether to use a web UI for an interactive chat session", + ) + generater_parser.add_argument( + "--num-samples", + type=int, + default=1, + help="Number of samples", + ) + generater_parser.add_argument( + "--max-new-tokens", + type=int, + default=200, + help="Maximum number of new tokens", + ) + generater_parser.add_argument( + "--top-k", + type=int, + default=200, + help="Top-k for sampling", + ) + generater_parser.add_argument( + "--temperature", type=float, default=0.8, help="Temperature for sampling" + ) + generater_parser.add_argument( + "--compile", + action="store_true", + help="Whether to compile the model with torch.compile", + ) + generater_parser.add_argument( + "--compile-prefill", + action="store_true", + help="Whether to compile the prefill. Improves prefill perf, but has higher compile times.", + ) + generater_parser.add_argument( + "--sequential-prefill", + action="store_true", + help="Whether to perform prefill sequentially. Only used for model debug.", + ) + generater_parser.add_argument( + "--speculate-k", + type=int, + default=5, + help="Speculative execution depth", + ) + + # Add CLI Args specific to Model Evaluation def _add_evaluation_args(parser) -> None: - parser.add_argument( + eval_parser = parser.add_argument_group("Evaluation Args", "Configs for evaluating model performance") + eval_parser.add_argument( "--tasks", nargs="+", type=str, default=["wikitext"], help="List of lm-eluther tasks to evaluate. Usage: --tasks task1 task2", ) - parser.add_argument( + eval_parser.add_argument( "--limit", type=int, default=None, help="Number of samples to evaluate", ) - parser.add_argument( + eval_parser.add_argument( "--max-seq-length", type=int, default=None, From cde4dccef106e3f912d94a5501974f84152d8c00 Mon Sep 17 00:00:00 2001 From: Jack Khuu Date: Tue, 9 Jul 2024 22:40:17 -0700 Subject: [PATCH 3/6] Move compile out of generation exclusive --- cli.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cli.py b/cli.py index df389ed39..a0c8e2651 100644 --- a/cli.py +++ b/cli.py @@ -80,6 +80,11 @@ def add_arguments_for_verb(parser, verb: str) -> None: default=None, help="Initialize torch seed", ) + generater_parser.add_argument( + "--compile", + action="store_true", + help="Whether to compile the model with torch.compile", + ) parser.add_argument( "--profile", type=Path, @@ -296,11 +301,6 @@ def _add_generation_args(parser) -> None: generater_parser.add_argument( "--temperature", type=float, default=0.8, help="Temperature for sampling" ) - generater_parser.add_argument( - "--compile", - action="store_true", - help="Whether to compile the model with torch.compile", - ) generater_parser.add_argument( "--compile-prefill", action="store_true", From f28e412a213ce2a26ea4fb2380f523829855d953 Mon Sep 17 00:00:00 2001 From: Jack Khuu Date: Tue, 9 Jul 2024 22:43:02 -0700 Subject: [PATCH 4/6] typo --- cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli.py b/cli.py index c9c4b3c60..d4f2faf3a 100644 --- a/cli.py +++ b/cli.py @@ -80,7 +80,7 @@ def add_arguments_for_verb(parser, verb: str) -> None: default=None, help="Initialize torch seed", ) - generater_parser.add_argument( + parser.add_argument( "--compile", action="store_true", help="Whether to compile the model with torch.compile", From 76325da78589515901f155b0385d29e457b99007 Mon Sep 17 00:00:00 2001 From: Jack Khuu Date: Wed, 10 Jul 2024 01:10:09 -0700 Subject: [PATCH 5/6] Fix test by removing temperature, which is a field eval doesn't use or expect --- .ci/scripts/validate.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/scripts/validate.sh b/.ci/scripts/validate.sh index bc8146d03..12107f8d3 100644 --- a/.ci/scripts/validate.sh +++ b/.ci/scripts/validate.sh @@ -284,7 +284,7 @@ function eval_model_sanity_check() { echo "*************************************************" if [ "$DTYPE" != "float16" ]; then python3 -W ignore export.py --dtype ${DTYPE} --quant "$QUANT_OPTIONS" --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" || exit 1 - python3 -W ignore eval.py --dtype ${DTYPE} --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" --limit 5 > "$MODEL_DIR/output_eval_aoti" || exit 1 + python3 -W ignore eval.py --dtype ${DTYPE} --checkpoint-path "$CHECKPOINT_PATH" --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" --limit 5 > "$MODEL_DIR/output_eval_aoti" || exit 1 cat "$MODEL_DIR/output_eval_aoti" fi; From d71edb6d292b9fbccd08a9f9f58141f1673ffb6b Mon Sep 17 00:00:00 2001 From: Jack Khuu Date: Wed, 10 Jul 2024 01:56:47 -0700 Subject: [PATCH 6/6] Typo Generater => Generator --- cli.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cli.py b/cli.py index d4f2faf3a..f36d93346 100644 --- a/cli.py +++ b/cli.py @@ -263,55 +263,55 @@ def _configure_artifact_inventory_args(parser, verb: str) -> None: # Add CLI Args specific to user prompted generation def _add_generation_args(parser) -> None: - generater_parser = parser.add_argument_group("Generation Args", "Configs for generating output based on provided prompt") - generater_parser.add_argument( + generator_parser = parser.add_argument_group("Generation Args", "Configs for generating output based on provided prompt") + generator_parser.add_argument( "--prompt", type=str, default="Hello, my name is", help="Input prompt for manual output generation", ) - generater_parser.add_argument( + generator_parser.add_argument( "--chat", action="store_true", help="Whether to start an interactive chat session", ) - generater_parser.add_argument( + generator_parser.add_argument( "--gui", action="store_true", help="Whether to use a web UI for an interactive chat session", ) - generater_parser.add_argument( + generator_parser.add_argument( "--num-samples", type=int, default=1, help="Number of samples", ) - generater_parser.add_argument( + generator_parser.add_argument( "--max-new-tokens", type=int, default=200, help="Maximum number of new tokens", ) - generater_parser.add_argument( + generator_parser.add_argument( "--top-k", type=int, default=200, help="Top-k for sampling", ) - generater_parser.add_argument( + generator_parser.add_argument( "--temperature", type=float, default=0.8, help="Temperature for sampling" ) - generater_parser.add_argument( + generator_parser.add_argument( "--compile-prefill", action="store_true", help="Whether to compile the prefill. Improves prefill perf, but has higher compile times.", ) - generater_parser.add_argument( + generator_parser.add_argument( "--sequential-prefill", action="store_true", help="Whether to perform prefill sequentially. Only used for model debug.", ) - generater_parser.add_argument( + generator_parser.add_argument( "--speculate-k", type=int, default=5,