diff --git a/.github/scripts/torchao_model_releases/eval.sh b/.github/scripts/torchao_model_releases/eval.sh index 1b24f26c2c..cfc49c7cc5 100644 --- a/.github/scripts/torchao_model_releases/eval.sh +++ b/.github/scripts/torchao_model_releases/eval.sh @@ -110,5 +110,5 @@ done # Run summarize_results.sh with MODEL_IDS if eval_type is "all" if [[ "$EVAL_TYPE" == "all" ]]; then - sh summarize_results.sh --model_id "${MODEL_ID_ARRAY[@]}" + sh summarize_results.sh --model_ids "${MODEL_ID_ARRAY[@]}" fi diff --git a/.github/scripts/torchao_model_releases/quantize_and_upload.py b/.github/scripts/torchao_model_releases/quantize_and_upload.py index e118cf8002..312fc2028b 100644 --- a/.github/scripts/torchao_model_releases/quantize_and_upload.py +++ b/.github/scripts/torchao_model_releases/quantize_and_upload.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import argparse +from typing import List import torch from huggingface_hub import ModelCard, get_token, whoami @@ -617,7 +618,14 @@ def _untie_weights_and_save_locally(model_id): def quantize_and_upload( - model_id, quant, tasks, calibration_limit, max_seq_length, push_to_hub + model_id: str, + quant: str, + tasks: List[str], + calibration_limit: int, + max_seq_length: int, + push_to_hub: bool, + push_to_user_id: str, + update_model_card: bool, ): _int8_int4_linear_config = Int8DynamicActivationIntxWeightConfig( weight_dtype=torch.int4, @@ -713,7 +721,9 @@ def quantize_and_upload( username = _get_username() MODEL_NAME = model_id.split("/")[-1] - save_to = f"{username}/{MODEL_NAME}-{quant}" + + save_to_user_id = username if push_to_user_id is None else push_to_user_id + save_to = f"{save_to_user_id}/{MODEL_NAME}-{quant}" untied_model_path = 'f"{{MODEL_NAME}}-untied-weights"' is_mobile = quant == "INT8-INT4" quantized_model_id = save_to @@ -759,7 +769,8 @@ def quantize_and_upload( if push_to_hub: quantized_model.push_to_hub(quantized_model_id, safe_serialization=False) tokenizer.push_to_hub(quantized_model_id) - card.push_to_hub(quantized_model_id) + if update_model_card: + card.push_to_hub(quantized_model_id) else: quantized_model.save_pretrained(quantized_model_id, safe_serialization=False) tokenizer.save_pretrained(quantized_model_id) @@ -828,6 +839,18 @@ def quantize_and_upload( default=False, help="Flag to indicate whether push to huggingface hub or not", ) + parser.add_argument( + "--push_to_user_id", + type=str, + default=None, + help="The user_id to use for pushing the quantized model, only used when --push_to_hub is set", + ) + parser.add_argument( + "--update_model_card", + action="store_true", + default=False, + help="Flag to indicate whether push model card to huggingface hub or not", + ) args = parser.parse_args() quantize_and_upload( args.model_id, @@ -836,4 +859,6 @@ def quantize_and_upload( args.calibration_limit, args.max_seq_length, args.push_to_hub, + args.push_to_user_id, + args.update_model_card, ) diff --git a/.github/scripts/torchao_model_releases/release.sh b/.github/scripts/torchao_model_releases/release.sh index 567e9b4d1b..8a9cc478b4 100755 --- a/.github/scripts/torchao_model_releases/release.sh +++ b/.github/scripts/torchao_model_releases/release.sh @@ -15,6 +15,8 @@ # Default quantization options default_quants=("FP8" "INT4" "INT8-INT4") push_to_hub="" +push_to_user_id="" +update_model_card="" # Parse arguments while [[ $# -gt 0 ]]; do case "$1" in @@ -34,6 +36,14 @@ while [[ $# -gt 0 ]]; do push_to_hub="--push_to_hub" shift ;; + --push_to_user_id) + push_to_user_id=("--push_to_user_id $2") + shift 2 + ;; + --update_model_card) + update_model_card="--update_model_card" + shift + ;; *) echo "Unknown option: $1" exit 1 @@ -43,7 +53,7 @@ done # Use default quants if none specified if [[ -z "$model_id" ]]; then echo "Error: --model_id is required" - echo "Usage: $0 --model_id [--quants [quant2 ...]] [--push_to_hub]" + echo "Usage: $0 --model_id [--quants [quant2 ...]] [--push_to_hub] [--push_to_user_id ] [--update_model_card]" exit 1 fi if [[ ${#quants[@]} -eq 0 ]]; then @@ -51,6 +61,6 @@ if [[ ${#quants[@]} -eq 0 ]]; then fi # Run the python command for each quantization option for quant in "${quants[@]}"; do - echo "Running: python quantize_and_upload.py --model_id $model_id --quant $quant $push_to_hub" - python quantize_and_upload.py --model_id "$model_id" --quant "$quant" $push_to_hub + echo "Running: python quantize_and_upload.py --model_id $model_id --quant $quant $push_to_hub $push_to_user_id $update_model_card" + python quantize_and_upload.py --model_id "$model_id" --quant "$quant" $push_to_hub $push_to_user_id $update_model_card done