Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .ci/scripts/validate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ function eval_model_sanity_check() {
echo "*************************************************"
if [ "$DTYPE" != "float16" ]; then
python3 -W ignore export.py --dtype ${DTYPE} --quant "$QUANT_OPTIONS" --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" || exit 1
python3 -W ignore eval.py --dtype ${DTYPE} --checkpoint-path "$CHECKPOINT_PATH" --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" --limit 5 > "$MODEL_DIR/output_eval_aoti" || exit 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are we dropping temperature 0 here? Won't this impact eval?

Copy link
Contributor Author

@Jack-Khuu Jack-Khuu Jul 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eval currently doesn't use the field at all....

I have a non-MVP task mentioning this, seems like gptfast doesn't take a temp arg either

python3 -W ignore eval.py --dtype ${DTYPE} --checkpoint-path "$CHECKPOINT_PATH" --dso-path ${MODEL_DIR}/${MODEL_NAME}.so --device "$TARGET_DEVICE" --limit 5 > "$MODEL_DIR/output_eval_aoti" || exit 1
cat "$MODEL_DIR/output_eval_aoti"
fi;

Expand Down
183 changes: 103 additions & 80 deletions cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,27 +61,14 @@ def add_arguments_for_verb(parser, verb: str) -> None:
help="Model name for well-known models",
)

parser.add_argument(
"--chat",
action="store_true",
help="Whether to start an interactive chat session",
)
if verb in ["browser", "chat", "generate"]:
_add_generation_args(parser)

parser.add_argument(
"--distributed",
action="store_true",
help="Whether to enable distributed inference",
)
parser.add_argument(
"--gui",
action="store_true",
help="Whether to use a web UI for an interactive chat session",
)
parser.add_argument(
"--prompt",
type=str,
default="Hello, my name is",
help="Input prompt",
)
parser.add_argument(
"--is-chat-model",
action="store_true",
Expand All @@ -93,54 +80,17 @@ def add_arguments_for_verb(parser, verb: str) -> None:
default=None,
help="Initialize torch seed",
)
parser.add_argument(
"--num-samples",
type=int,
default=1,
help="Number of samples",
)
parser.add_argument(
"--max-new-tokens",
type=int,
default=200,
help="Maximum number of new tokens",
)
parser.add_argument(
"--top-k",
type=int,
default=200,
help="Top-k for sampling",
)
parser.add_argument(
"--temperature", type=float, default=0.8, help="Temperature for sampling"
)
parser.add_argument(
"--compile",
action="store_true",
help="Whether to compile the model with torch.compile",
)
parser.add_argument(
"--compile-prefill",
action="store_true",
help="Whether to compile the prefill. Improves prefill perf, but has higher compile times.",
)
parser.add_argument(
"--sequential-prefill",
action="store_true",
help="Whether to perform prefill sequentially. Only used for model debug.",
)
parser.add_argument(
"--profile",
type=Path,
default=None,
help="Profile path.",
)
parser.add_argument(
"--speculate-k",
type=int,
default=5,
help="Speculative execution depth",
)
parser.add_argument(
"--draft-checkpoint-path",
type=Path,
Expand Down Expand Up @@ -171,30 +121,10 @@ def add_arguments_for_verb(parser, verb: str) -> None:
default=None,
help="Use the specified model tokenizer file",
)
parser.add_argument(
"--output-pte-path",
type=str,
default=None,
help="Output to the specified ExecuTorch .pte model file",
)
parser.add_argument(
"--output-dso-path",
type=str,
default=None,
help="Output to the specified AOT Inductor .dso model file",
)
parser.add_argument(
"--dso-path",
type=Path,
default=None,
help="Use the specified AOT Inductor .dso model file",
)
parser.add_argument(
"--pte-path",
type=Path,
default=None,
help="Use the specified ExecuTorch .pte model file",
)

_add_exported_model_input_args(parser)
_add_export_output_path_args(parser)

parser.add_argument(
"--dtype",
default="fast",
Expand Down Expand Up @@ -259,6 +189,40 @@ def add_arguments_for_verb(parser, verb: str) -> None:
_add_cli_metadata_args(parser)


# Add CLI Args representing user provided exported model files
def _add_export_output_path_args(parser) -> None:
output_path_parser = parser.add_argument_group("Export Output Path Args", "Specify the output path for the exported model files")
output_path_parser.add_argument(
"--output-pte-path",
type=str,
default=None,
help="Output to the specified ExecuTorch .pte model file",
)
output_path_parser.add_argument(
"--output-dso-path",
type=str,
default=None,
help="Output to the specified AOT Inductor .dso model file",
)


# Add CLI Args representing user provided exported model files
def _add_exported_model_input_args(parser) -> None:
exported_model_path_parser = parser.add_argument_group("Exported Model Path Args", "Specify the path of the exported model files to ingest")
exported_model_path_parser.add_argument(
"--dso-path",
type=Path,
default=None,
help="Use the specified AOT Inductor .dso model file",
)
exported_model_path_parser.add_argument(
"--pte-path",
type=Path,
default=None,
help="Use the specified ExecuTorch .pte model file",
)


# Add CLI Args that are relevant to any subcommand execution
def _add_cli_metadata_args(parser) -> None:
parser.add_argument(
Expand Down Expand Up @@ -297,22 +261,81 @@ def _configure_artifact_inventory_args(parser, verb: str) -> None:
)


# Add CLI Args specific to user prompted generation
def _add_generation_args(parser) -> None:
generator_parser = parser.add_argument_group("Generation Args", "Configs for generating output based on provided prompt")
generator_parser.add_argument(
"--prompt",
type=str,
default="Hello, my name is",
help="Input prompt for manual output generation",
)
generator_parser.add_argument(
"--chat",
action="store_true",
help="Whether to start an interactive chat session",
)
generator_parser.add_argument(
"--gui",
action="store_true",
help="Whether to use a web UI for an interactive chat session",
)
generator_parser.add_argument(
"--num-samples",
type=int,
default=1,
help="Number of samples",
)
generator_parser.add_argument(
"--max-new-tokens",
type=int,
default=200,
help="Maximum number of new tokens",
)
generator_parser.add_argument(
"--top-k",
type=int,
default=200,
help="Top-k for sampling",
)
generator_parser.add_argument(
"--temperature", type=float, default=0.8, help="Temperature for sampling"
)
generator_parser.add_argument(
"--compile-prefill",
action="store_true",
help="Whether to compile the prefill. Improves prefill perf, but has higher compile times.",
)
generator_parser.add_argument(
"--sequential-prefill",
action="store_true",
help="Whether to perform prefill sequentially. Only used for model debug.",
)
generator_parser.add_argument(
"--speculate-k",
type=int,
default=5,
help="Speculative execution depth",
)


# Add CLI Args specific to Model Evaluation
def _add_evaluation_args(parser) -> None:
parser.add_argument(
eval_parser = parser.add_argument_group("Evaluation Args", "Configs for evaluating model performance")
eval_parser.add_argument(
"--tasks",
nargs="+",
type=str,
default=["wikitext"],
help="List of lm-eluther tasks to evaluate. Usage: --tasks task1 task2",
)
parser.add_argument(
eval_parser.add_argument(
"--limit",
type=int,
default=None,
help="Number of samples to evaluate",
)
parser.add_argument(
eval_parser.add_argument(
"--max-seq-length",
type=int,
default=None,
Expand Down