From e05a478d53865ab67cdd017dd226e1ae28b4811d Mon Sep 17 00:00:00 2001 From: abhishek-singh591 Date: Tue, 9 Sep 2025 09:51:36 +0000 Subject: [PATCH 1/6] Resolved issue related custom_io in CLI Signed-off-by: abhishek-singh591 --- QEfficient/cloud/custom_yaml.py | 76 ++++++++++++++++++++++++++++ QEfficient/cloud/export.py | 5 +- QEfficient/compile/compile_helper.py | 22 +++++--- 3 files changed, 95 insertions(+), 8 deletions(-) create mode 100644 QEfficient/cloud/custom_yaml.py diff --git a/QEfficient/cloud/custom_yaml.py b/QEfficient/cloud/custom_yaml.py new file mode 100644 index 000000000..23c041633 --- /dev/null +++ b/QEfficient/cloud/custom_yaml.py @@ -0,0 +1,76 @@ +from pathlib import Path +import warnings + +def dump_custom_io(custom_io, cache_dir, dtype_suffix): + custom_io_yaml = Path(cache_dir) / f"custom_io_{dtype_suffix}.yaml" + with open(custom_io_yaml, "w") as fp: + for io_name, dtype in custom_io.items(): + fp.write(f" - IOName: {io_name}\n Precision: {dtype}\n\n") + +def generate_custom_io(qeff_model, cache_dir=".", mxint8_kv_cache=False): + model_class_name = type(qeff_model).__name__ + if not model_class_name == "QEFFAutoModelForCausalLM": + output_names = qeff_model.model.get_output_names() + kv_cache_dtype = "mxint8" if mxint8_kv_cache else "float16" + dtype_suffix = "int8" if mxint8_kv_cache else "fp16" + + custom_io = {} + + # if model_class_name in [ + # "QEffCausalLMForTextImageToTextModel", + # "QEffVisionEncoderForTextImageToTextModel" + # ]: + # dump_custom_io(custom_io, cache_dir, dtype_suffix) + # warnings.warn( + # f"custom_io generated for these '{model_class_name}' class is empty.", + # UserWarning + # ) + + # Dual QPC: generate two YAML files + if model_class_name == "_QEFFAutoModelForImageTextToTextDualQPC": + custom_io_vision = {} + for output_name in output_names.get("vision", []): + custom_io_vision[output_name] = kv_cache_dtype if output_name.startswith("past_") else "float16" + + custom_io_lang = {} + for output_name in output_names.get("lang", []): + if output_name.endswith("_RetainedState"): + base_name = output_name[: -len("_RetainedState")] + custom_io_lang[base_name] = "float16" if "vision_embeds" in output_name else kv_cache_dtype + custom_io_lang[output_name] = "float16" if "vision_embeds" in output_name else kv_cache_dtype + + dump_custom_io(custom_io_vision, cache_dir, f'{dtype_suffix}_vision') + dump_custom_io(custom_io_lang, cache_dir, f'{dtype_suffix}_lang') + return {**custom_io_vision, **custom_io_lang} + + # Single QPC + elif model_class_name == "_QEFFAutoModelForImageTextToTextSingleQPC": + for input_name in output_names: + if input_name.endswith("_RetainedState"): + custom_io[input_name[: -len("_RetainedState")]] = ( + "float16" if "pixel_values" in input_name else kv_cache_dtype + ) + for output_name in output_names: + if output_name.endswith("_RetainedState"): + custom_io[output_name] = "float16" if "pixel_values" in output_name else kv_cache_dtype + + # Causal LM + elif model_class_name == "QEFFAutoModelForCausalLM": + for suffix in ["", "_RetainedState"]: + num_layers = getattr(qeff_model, "num_layers", 12) + for i in range(num_layers): + for kv in ["key", "value"]: + custom_io[f"past_{kv}.{i}{suffix}"] = kv_cache_dtype + + # Speech Seq2Seq + elif model_class_name == "QEFFAutoModelForSpeechSeq2Seq": + custom_io["input_features"] = kv_cache_dtype + for output_name in output_names: + if output_name.endswith("_RetainedState"): + custom_io[output_name[: -len("_RetainedState")]] = kv_cache_dtype + custom_io[output_name] = kv_cache_dtype + else: + warnings.warn(f"Unsupported model class: {model_class_name}", UserWarning) + + dump_custom_io(custom_io, cache_dir, dtype_suffix) + return custom_io \ No newline at end of file diff --git a/QEfficient/cloud/export.py b/QEfficient/cloud/export.py index 849325c9d..11c78c902 100644 --- a/QEfficient/cloud/export.py +++ b/QEfficient/cloud/export.py @@ -12,7 +12,7 @@ from QEfficient.base.common import QEFFCommonLoader from QEfficient.utils import check_and_assign_cache_dir from QEfficient.utils.logging_utils import logger - +from .custom_yaml import generate_custom_io # Specifically for Docker images. ROOT_DIR = os.path.dirname(os.path.abspath("")) @@ -45,6 +45,7 @@ def get_onnx_model_path( full_batch_size=full_batch_size, local_model_dir=local_model_dir, ) + generate_custom_io(qeff_model, cache_dir=".", mxint8_kv_cache=False) onnx_model_path = qeff_model.export() logger.info(f"Generated onnx_path: {onnx_model_path}") return onnx_model_path @@ -107,4 +108,4 @@ def main( help="Set full batch size to enable continuous batching mode, default is None", ) args = parser.parse_args() - main(**args.__dict__) + main(**args.__dict__) \ No newline at end of file diff --git a/QEfficient/compile/compile_helper.py b/QEfficient/compile/compile_helper.py index 56177cce9..e61e6112e 100644 --- a/QEfficient/compile/compile_helper.py +++ b/QEfficient/compile/compile_helper.py @@ -171,6 +171,7 @@ def compile( Returns: :str: Path to compiled ``qpc`` package. """ + if full_batch_size and batch_size != 1: raise ValueError("Only either batch_size or full_batch_size should be greater than one") @@ -184,12 +185,21 @@ def compile( path=specialization_json_path, full_batch_size=full_batch_size, ) - - # Select the customIO config based on the mx flag. - custom_io_file_name = "custom_io_int8.yaml" if mxint8 else "custom_io_fp16.yaml" - + + dtype_suffix = "int8" if mxint8 else "fp16" + source_path = f"./custom_io_{dtype_suffix}.yaml" + destination_path = os.path.join(os.path.dirname(qpc_path), f"custom_io_{dtype_suffix}.yaml") + + # Move the custom YAML file to the cache/qeff_model directory + try: + shutil.move(source_path, destination_path) + print(f"Successfully moved '{source_path}' to '{destination_path}'.") + except Exception as e: + print(f"Error while moving file '{source_path}': {e}") + + custom_io_file_name = f"custom_io_{dtype_suffix}.yaml" if custom_io_file_path is None: - custom_io_file_path = os.path.join(os.path.dirname(onnx_path), custom_io_file_name) + custom_io_file_path = os.path.join(os.path.dirname(qpc_path), custom_io_file_name) if not os.path.isfile(custom_io_file_path): raise FileNotFoundError( @@ -234,4 +244,4 @@ def compile( else: logger.info(f"Compiled QPC files can be found here: {qpc_path}") - return qpc_path + return qpc_path \ No newline at end of file From 294d97fcc683f335b02a237c11f135a64ae970c4 Mon Sep 17 00:00:00 2001 From: abhishek-singh591 Date: Wed, 10 Sep 2025 05:05:02 +0000 Subject: [PATCH 2/6] Fixed issue of No custom_IO file found during compile through CLI Signed-off-by: abhishek-singh591 --- QEfficient/cloud/custom_yaml.py | 264 ++++++++++++++++++++++++-------- 1 file changed, 196 insertions(+), 68 deletions(-) diff --git a/QEfficient/cloud/custom_yaml.py b/QEfficient/cloud/custom_yaml.py index 23c041633..ba6da26e2 100644 --- a/QEfficient/cloud/custom_yaml.py +++ b/QEfficient/cloud/custom_yaml.py @@ -1,76 +1,204 @@ from pathlib import Path import warnings -def dump_custom_io(custom_io, cache_dir, dtype_suffix): - custom_io_yaml = Path(cache_dir) / f"custom_io_{dtype_suffix}.yaml" - with open(custom_io_yaml, "w") as fp: - for io_name, dtype in custom_io.items(): - fp.write(f" - IOName: {io_name}\n Precision: {dtype}\n\n") - -def generate_custom_io(qeff_model, cache_dir=".", mxint8_kv_cache=False): - model_class_name = type(qeff_model).__name__ - if not model_class_name == "QEFFAutoModelForCausalLM": - output_names = qeff_model.model.get_output_names() - kv_cache_dtype = "mxint8" if mxint8_kv_cache else "float16" - dtype_suffix = "int8" if mxint8_kv_cache else "fp16" - - custom_io = {} - - # if model_class_name in [ - # "QEffCausalLMForTextImageToTextModel", - # "QEffVisionEncoderForTextImageToTextModel" - # ]: - # dump_custom_io(custom_io, cache_dir, dtype_suffix) - # warnings.warn( - # f"custom_io generated for these '{model_class_name}' class is empty.", - # UserWarning - # ) - - # Dual QPC: generate two YAML files - if model_class_name == "_QEFFAutoModelForImageTextToTextDualQPC": - custom_io_vision = {} - for output_name in output_names.get("vision", []): - custom_io_vision[output_name] = kv_cache_dtype if output_name.startswith("past_") else "float16" +class CustomIOGenerator: + """ + Abstract base class for generating custom IO mappings for different model types. - custom_io_lang = {} - for output_name in output_names.get("lang", []): - if output_name.endswith("_RetainedState"): - base_name = output_name[: -len("_RetainedState")] - custom_io_lang[base_name] = "float16" if "vision_embeds" in output_name else kv_cache_dtype - custom_io_lang[output_name] = "float16" if "vision_embeds" in output_name else kv_cache_dtype - - dump_custom_io(custom_io_vision, cache_dir, f'{dtype_suffix}_vision') - dump_custom_io(custom_io_lang, cache_dir, f'{dtype_suffix}_lang') - return {**custom_io_vision, **custom_io_lang} + Args: + model (object): The model instance for which IO mappings are to be generated. + cache_dir (str): Directory path where the generated YAML files will be saved. + mxint8_kv_cache (bool): If True, use 'mxint8' precision for KV cache; otherwise, use 'float16'. + """ + + def __init__(self, model, cache_dir=".", mxint8_kv_cache=False): + self.model = model + self.cache_dir = Path(cache_dir) + self.kv_cache_dtype = "mxint8" if mxint8_kv_cache else "float16" + self.dtype_suffix = "int8" if mxint8_kv_cache else "fp16" + + def dump(self, custom_io: dict, suffix: str): + """ + Writes the custom IO mapping to a YAML file. + + Args: + custom_io (dict): Dictionary containing IO names and their precision types. + suffix (str): Suffix to append to the output filename. + """ + custom_io_yaml = self.cache_dir / f"custom_io_{suffix}.yaml" + with open(custom_io_yaml, "w") as fp: + for io_name, dtype in custom_io.items(): + fp.write(f" - IOName: {io_name}\n Precision: {dtype}\n\n") + + def generate(self) -> dict: + """ + Abstract method to generate custom IO mappings. + + Returns: + dict: A dictionary of IO names and their precision types. + + Raises: + NotImplementedError: Must be implemented by subclasses. + """ + raise NotImplementedError("Subclasses must implement this method") + + +class CausalLMIOGenerator(CustomIOGenerator): + """ + IO generator for causal language models. + """ - # Single QPC - elif model_class_name == "_QEFFAutoModelForImageTextToTextSingleQPC": - for input_name in output_names: - if input_name.endswith("_RetainedState"): - custom_io[input_name[: -len("_RetainedState")]] = ( - "float16" if "pixel_values" in input_name else kv_cache_dtype - ) - for output_name in output_names: - if output_name.endswith("_RetainedState"): - custom_io[output_name] = "float16" if "pixel_values" in output_name else kv_cache_dtype - - # Causal LM - elif model_class_name == "QEFFAutoModelForCausalLM": + def generate(self) -> dict: + """ + Generates IO mappings for past key/value states in causal language models. + + Returns: + dict: Mapping of IO names to precision types. + """ + custom_io = {} + num_layers = getattr(self.model, "num_layers", 12) for suffix in ["", "_RetainedState"]: - num_layers = getattr(qeff_model, "num_layers", 12) for i in range(num_layers): for kv in ["key", "value"]: - custom_io[f"past_{kv}.{i}{suffix}"] = kv_cache_dtype - - # Speech Seq2Seq - elif model_class_name == "QEFFAutoModelForSpeechSeq2Seq": - custom_io["input_features"] = kv_cache_dtype - for output_name in output_names: - if output_name.endswith("_RetainedState"): - custom_io[output_name[: -len("_RetainedState")]] = kv_cache_dtype - custom_io[output_name] = kv_cache_dtype - else: - warnings.warn(f"Unsupported model class: {model_class_name}", UserWarning) - - dump_custom_io(custom_io, cache_dir, dtype_suffix) - return custom_io \ No newline at end of file + custom_io[f"past_{kv}.{i}{suffix}"] = self.kv_cache_dtype + self.dump(custom_io, self.dtype_suffix) + return custom_io + + +class DualQPCIOGenerator(CustomIOGenerator): + """ + IO generator for dual QPC models (e.g., vision-language models). + """ + + def generate(self) -> dict: + """ + Generates IO mappings for both vision and language components. + + Returns: + dict: Combined mapping of IO names to precision types for vision and language outputs. + """ + output_names = self.model.model.get_output_names() + custom_io_vision = { + name: self.kv_cache_dtype if name.startswith("past_") else "float16" + for name in output_names.get("vision", []) + } + + custom_io_lang = {} + for name in output_names.get("lang", []): + if name.endswith("_RetainedState"): + base = name[:-len("_RetainedState")] + dtype = "float16" if "vision_embeds" in name else self.kv_cache_dtype + custom_io_lang[base] = dtype + custom_io_lang[name] = dtype + + self.dump(custom_io_vision, f"{self.dtype_suffix}_vision") + self.dump(custom_io_lang, f"{self.dtype_suffix}_lang") + return {**custom_io_vision, **custom_io_lang} + + +class SingleQPCIOGenerator(CustomIOGenerator): + """ + IO generator for single QPC models. + """ + + def generate(self) -> dict: + """ + Generates IO mappings for retained states in single QPC models. + + Returns: + dict: Mapping of IO names to precision types. + """ + output_names = self.model.model.get_output_names() + custom_io = {} + for name in output_names: + if name.endswith("_RetainedState"): + base = name[:-len("_RetainedState")] + dtype = "float16" if "pixel_values" in name else self.kv_cache_dtype + custom_io[base] = dtype + custom_io[name] = dtype + self.dump(custom_io, self.dtype_suffix) + return custom_io + + +class SpeechSeq2SeqIOGenerator(CustomIOGenerator): + """ + IO generator for speech sequence-to-sequence models. + """ + + def generate(self) -> dict: + """ + Generates IO mappings for input features and retained states in speech models. + + Returns: + dict: Mapping of IO names to precision types. + """ + output_names = self.model.model.get_output_names() + custom_io = {"input_features": self.kv_cache_dtype} + for name in output_names: + if name.endswith("_RetainedState"): + base = name[:-len("_RetainedState")] + custom_io[base] = self.kv_cache_dtype + custom_io[name] = self.kv_cache_dtype + self.dump(custom_io, self.dtype_suffix) + return custom_io + + +class UnsupportedModelIOGenerator(CustomIOGenerator): + """ + Fallback IO generator for unsupported model types. + """ + + def generate(self) -> dict: + """ + Emits a warning for unsupported model types. + + Returns: + dict: Empty dictionary. + """ + warnings.warn(f"Unsupported model class: {type(self.model).__name__}", UserWarning) + return {} + + +class CustomIOFactory: + """ + Factory class to instantiate the appropriate IO generator based on model type. + """ + + @staticmethod + def get_generator(model, cache_dir=".", mxint8_kv_cache=False) -> CustomIOGenerator: + """ + Returns the appropriate IO generator instance for the given model. + + Args: + model (object): The model instance. + cache_dir (str): Directory to store YAML files. + mxint8_kv_cache (bool): Flag to use 'mxint8' precision. + + Returns: + CustomIOGenerator: An instance of the appropriate subclass. + """ + model_class_name = type(model).__name__ + mapping = { + "QEFFAutoModelForCausalLM": CausalLMIOGenerator, + "_QEFFAutoModelForImageTextToTextDualQPC": DualQPCIOGenerator, + "_QEFFAutoModelForImageTextToTextSingleQPC": SingleQPCIOGenerator, + "QEFFAutoModelForSpeechSeq2Seq": SpeechSeq2SeqIOGenerator, + } + generator_class = mapping.get(model_class_name, UnsupportedModelIOGenerator) + return generator_class(model, cache_dir, mxint8_kv_cache) + + +def generate_custom_io(qeff_model, cache_dir=".", mxint8_kv_cache=False) -> dict: + """ + Generates and returns custom IO mappings for the given QEFF model. + + Args: + qeff_model (object): The model instance. + cache_dir (str): Directory to store YAML files. + mxint8_kv_cache (bool): Flag to use 'mxint8' precision. + + Returns: + dict: Custom IO mapping generated by the appropriate generator. + """ + generator = CustomIOFactory.get_generator(qeff_model, cache_dir, mxint8_kv_cache) + return generator.generate() From 33e4b1ac31c106e58336391e395e4f6addcc9654 Mon Sep 17 00:00:00 2001 From: abhishek-singh591 Date: Wed, 10 Sep 2025 05:06:58 +0000 Subject: [PATCH 3/6] Fixed issue of No custom_IO file found during compile through CLI Signed-off-by: abhishek-singh591 --- QEfficient/cloud/custom_yaml.py | 9 +++++---- QEfficient/cloud/export.py | 4 +++- QEfficient/compile/compile_helper.py | 6 +++--- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/QEfficient/cloud/custom_yaml.py b/QEfficient/cloud/custom_yaml.py index ba6da26e2..364f61320 100644 --- a/QEfficient/cloud/custom_yaml.py +++ b/QEfficient/cloud/custom_yaml.py @@ -1,5 +1,6 @@ -from pathlib import Path import warnings +from pathlib import Path + class CustomIOGenerator: """ @@ -86,7 +87,7 @@ def generate(self) -> dict: custom_io_lang = {} for name in output_names.get("lang", []): if name.endswith("_RetainedState"): - base = name[:-len("_RetainedState")] + base = name[: -len("_RetainedState")] dtype = "float16" if "vision_embeds" in name else self.kv_cache_dtype custom_io_lang[base] = dtype custom_io_lang[name] = dtype @@ -112,7 +113,7 @@ def generate(self) -> dict: custom_io = {} for name in output_names: if name.endswith("_RetainedState"): - base = name[:-len("_RetainedState")] + base = name[: -len("_RetainedState")] dtype = "float16" if "pixel_values" in name else self.kv_cache_dtype custom_io[base] = dtype custom_io[name] = dtype @@ -136,7 +137,7 @@ def generate(self) -> dict: custom_io = {"input_features": self.kv_cache_dtype} for name in output_names: if name.endswith("_RetainedState"): - base = name[:-len("_RetainedState")] + base = name[: -len("_RetainedState")] custom_io[base] = self.kv_cache_dtype custom_io[name] = self.kv_cache_dtype self.dump(custom_io, self.dtype_suffix) diff --git a/QEfficient/cloud/export.py b/QEfficient/cloud/export.py index 11c78c902..218dc7c93 100644 --- a/QEfficient/cloud/export.py +++ b/QEfficient/cloud/export.py @@ -12,7 +12,9 @@ from QEfficient.base.common import QEFFCommonLoader from QEfficient.utils import check_and_assign_cache_dir from QEfficient.utils.logging_utils import logger + from .custom_yaml import generate_custom_io + # Specifically for Docker images. ROOT_DIR = os.path.dirname(os.path.abspath("")) @@ -108,4 +110,4 @@ def main( help="Set full batch size to enable continuous batching mode, default is None", ) args = parser.parse_args() - main(**args.__dict__) \ No newline at end of file + main(**args.__dict__) diff --git a/QEfficient/compile/compile_helper.py b/QEfficient/compile/compile_helper.py index e61e6112e..8f79b39c4 100644 --- a/QEfficient/compile/compile_helper.py +++ b/QEfficient/compile/compile_helper.py @@ -171,7 +171,7 @@ def compile( Returns: :str: Path to compiled ``qpc`` package. """ - + if full_batch_size and batch_size != 1: raise ValueError("Only either batch_size or full_batch_size should be greater than one") @@ -185,7 +185,7 @@ def compile( path=specialization_json_path, full_batch_size=full_batch_size, ) - + dtype_suffix = "int8" if mxint8 else "fp16" source_path = f"./custom_io_{dtype_suffix}.yaml" destination_path = os.path.join(os.path.dirname(qpc_path), f"custom_io_{dtype_suffix}.yaml") @@ -244,4 +244,4 @@ def compile( else: logger.info(f"Compiled QPC files can be found here: {qpc_path}") - return qpc_path \ No newline at end of file + return qpc_path From 5d9c60f2ebe69ff207621b1043d29803ae2a8594 Mon Sep 17 00:00:00 2001 From: abhishek-singh591 Date: Fri, 12 Sep 2025 05:48:13 +0000 Subject: [PATCH 4/6] Moved custom_yaml.py in utils and other appropriate changes Signed-off-by: abhishek-singh591 --- QEfficient/cloud/export.py | 24 +++++++++++++++------- QEfficient/{cloud => utils}/custom_yaml.py | 8 ++++++++ 2 files changed, 25 insertions(+), 7 deletions(-) rename QEfficient/{cloud => utils}/custom_yaml.py (95%) diff --git a/QEfficient/cloud/export.py b/QEfficient/cloud/export.py index 218dc7c93..e0c6a9b59 100644 --- a/QEfficient/cloud/export.py +++ b/QEfficient/cloud/export.py @@ -11,23 +11,23 @@ from QEfficient.base.common import QEFFCommonLoader from QEfficient.utils import check_and_assign_cache_dir +from QEfficient.utils.custom_yaml import generate_custom_io from QEfficient.utils.logging_utils import logger -from .custom_yaml import generate_custom_io - # Specifically for Docker images. ROOT_DIR = os.path.dirname(os.path.abspath("")) -def get_onnx_model_path( +def get_onnx_path_and_setup_customIO( model_name: str, cache_dir: Optional[str] = None, hf_token: Optional[str] = None, full_batch_size: Optional[int] = None, local_model_dir: Optional[str] = None, + mxint8_kv_cache: Optional[int] = False, ): """ - exports the model to onnx if pre-exported file is not found and returns onnx_model_path + exports the model to onnx if pre-exported file is not found and returns onnx_model_path and generates cutom_io file. ``Mandatory`` Args: :model_name (str): Hugging Face Model Card name, Example: ``gpt2``. @@ -47,9 +47,11 @@ def get_onnx_model_path( full_batch_size=full_batch_size, local_model_dir=local_model_dir, ) - generate_custom_io(qeff_model, cache_dir=".", mxint8_kv_cache=False) onnx_model_path = qeff_model.export() logger.info(f"Generated onnx_path: {onnx_model_path}") + + # Generating Custom IO for the compile. + generate_custom_io(qeff_model, mxint8_kv_cache=mxint8_kv_cache) return onnx_model_path @@ -59,6 +61,7 @@ def main( hf_token: Optional[str] = None, local_model_dir: Optional[str] = None, full_batch_size: Optional[int] = None, + mxint8_kv_cache: Optional[bool] = False, ) -> None: """ Helper function used by export CLI app for exporting to ONNX Model. @@ -71,19 +74,20 @@ def main( :hf_token (str): HuggingFace login token to access private repos. ``Defaults to None.`` :local_model_dir (str): Path to custom model weights and config files. ``Defaults to None.`` :full_batch_size (int): Set full batch size to enable continuous batching mode. ``Defaults to None.`` - + :mxint8_kv_cache (bool): Whether to export int8 model or not. ``Defaults to False.`` .. code-block:: bash python -m QEfficient.cloud.export OPTIONS """ cache_dir = check_and_assign_cache_dir(local_model_dir, cache_dir) - get_onnx_model_path( + get_onnx_path_and_setup_customIO( model_name=model_name, cache_dir=cache_dir, hf_token=hf_token, full_batch_size=full_batch_size, local_model_dir=local_model_dir, + mxint8_kv_cache=mxint8_kv_cache, ) @@ -109,5 +113,11 @@ def main( default=None, help="Set full batch size to enable continuous batching mode, default is None", ) + parser.add_argument( + "--mxint8_kv_cache", + "--mxint8-kv-cache", + required=False, + help="Compress Present/Past KV to MXINT8 using CustomIO config, default is False", + ) args = parser.parse_args() main(**args.__dict__) diff --git a/QEfficient/cloud/custom_yaml.py b/QEfficient/utils/custom_yaml.py similarity index 95% rename from QEfficient/cloud/custom_yaml.py rename to QEfficient/utils/custom_yaml.py index 364f61320..2adb656b5 100644 --- a/QEfficient/cloud/custom_yaml.py +++ b/QEfficient/utils/custom_yaml.py @@ -1,3 +1,10 @@ +# ----------------------------------------------------------------------------- +# +# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +# SPDX-License-Identifier: BSD-3-Clause +# +# ---------------------------------------------------------------------------- + import warnings from pathlib import Path @@ -94,6 +101,7 @@ def generate(self) -> dict: self.dump(custom_io_vision, f"{self.dtype_suffix}_vision") self.dump(custom_io_lang, f"{self.dtype_suffix}_lang") + warnings.warn(f"Unsupported model class via CLI: {type(self.model).__name__}", UserWarning) return {**custom_io_vision, **custom_io_lang} From d23558e3798824992128e79101ac318aa3f86edb Mon Sep 17 00:00:00 2001 From: abhishek-singh591 Date: Fri, 10 Oct 2025 05:14:37 +0000 Subject: [PATCH 5/6] Minnor fixes function name error Signed-off-by: abhishek-singh591 --- examples/cpp_execution/text_inference_using_cpp.py | 4 ++-- tests/cloud/test_export_compile_execute.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/cpp_execution/text_inference_using_cpp.py b/examples/cpp_execution/text_inference_using_cpp.py index 9b0d59c73..072f2c57c 100644 --- a/examples/cpp_execution/text_inference_using_cpp.py +++ b/examples/cpp_execution/text_inference_using_cpp.py @@ -14,7 +14,7 @@ from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast import QEfficient -from QEfficient.cloud.export import get_onnx_model_path +from QEfficient.cloud.export import get_onnx_path_and_setup_customIO from QEfficient.generation.text_generation_inference import fix_prompts, get_compilation_dims, get_input_prompts from QEfficient.utils import check_and_assign_cache_dir, get_qpc_dir_path, load_hf_tokenizer, qpc_exists from QEfficient.utils.logging_utils import logger @@ -103,7 +103,7 @@ def main( logger.info(f"Pre-compiled qpc found at {qpc_dir_path}! Executing with given prompt") else: # Handle onnx model generation - onnx_model_path = get_onnx_model_path( + onnx_model_path = get_onnx_path_and_setup_customIO( model_name, cache_dir, tokenizer, hf_token, local_model_dir, full_batch_size ) _ = QEfficient.compile( diff --git a/tests/cloud/test_export_compile_execute.py b/tests/cloud/test_export_compile_execute.py index 7cac59da7..f1c80a6b0 100644 --- a/tests/cloud/test_export_compile_execute.py +++ b/tests/cloud/test_export_compile_execute.py @@ -18,7 +18,7 @@ def check_export_compile_execute(mocker, model_name, full_batch_size=None, enable_qnn=False): check_and_assign_cache_dir_spy = mocker.spy(QEfficient.cloud.export, "check_and_assign_cache_dir") - get_onnx_model_path_spy = mocker.spy(QEfficient.cloud.export, "get_onnx_model_path") + get_onnx_path_and_setup_customIO_spy = mocker.spy(QEfficient.cloud.export, "get_onnx_path_and_setup_customIO") load_hf_tokenizer_spy = mocker.spy(QEfficient.cloud.execute, "load_hf_tokenizer") cloud_ai_100_exec_kv_spy = mocker.spy(QEfficient.cloud.execute, "cloud_ai_100_exec_kv") @@ -29,9 +29,9 @@ def check_export_compile_execute(mocker, model_name, full_batch_size=None, enabl ) check_and_assign_cache_dir_spy.assert_called_once() - get_onnx_model_path_spy.assert_called_once() + get_onnx_path_and_setup_customIO_spy.assert_called_once() - onnx_model_path = get_onnx_model_path_spy.spy_return + onnx_model_path = get_onnx_path_and_setup_customIO_spy.spy_return assert os.path.isfile(onnx_model_path) From a018c373e818ed4f4d017b1a56c17ab76d522082 Mon Sep 17 00:00:00 2001 From: abhishek-singh591 Date: Fri, 10 Oct 2025 05:23:20 +0000 Subject: [PATCH 6/6] Minnor fixes function name error Signed-off-by: abhishek-singh591 --- QEfficient/cloud/export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/QEfficient/cloud/export.py b/QEfficient/cloud/export.py index a36d5a60d..a5e0b6e19 100644 --- a/QEfficient/cloud/export.py +++ b/QEfficient/cloud/export.py @@ -84,7 +84,7 @@ def main( This function serves as the entry point for exporting a PyTorch model, loaded via QEFFCommonLoader, to the ONNX format. It prepares the necessary - paths and calls `get_onnx_model_path`. + paths and calls `get_onnx_path_and_setup_customIO`. Parameters ----------