diff --git a/examples/resnet/resnet_ptq_cpu.json b/examples/resnet/resnet_ptq_cpu.json
index e66b5652fb..922882b08b 100644
--- a/examples/resnet/resnet_ptq_cpu.json
+++ b/examples/resnet/resnet_ptq_cpu.json
@@ -24,7 +24,7 @@
                         {
                             "name": "accuracy_custom",
                             "priority": 1, "higher_is_better": true,
-                            "goal": {"type": "max-degradation", "value": 0.01}
+                            "goal": {"type": "max-degradation", "value": 0.1}
                         }
                     ],
                     "user_config":{
@@ -41,7 +41,7 @@
                         {
                             "name": "avg",
                             "priority": 2,
-                            "goal": {"type": "percent-min-improvement", "value": 20}
+                            "goal": {"type": "percent-min-improvement", "value": 10}
                         }
                     ],
                     "user_config":{
diff --git a/examples/test/test_resnet_vitis_ai_ptq_cpu.py b/examples/test/test_resnet_vitis_ai_ptq_cpu.py
index 8c7a0d3e56..fff11065eb 100644
--- a/examples/test/test_resnet_vitis_ai_ptq_cpu.py
+++ b/examples/test/test_resnet_vitis_ai_ptq_cpu.py
@@ -33,7 +33,7 @@ def setup():
 @pytest.mark.parametrize("system", ["local_system", "aml_system"])
 @pytest.mark.parametrize("olive_json", ["resnet_vitis_ai_ptq_cpu.json"])
 @pytest.mark.skipif(
-    version.parse(OrtVersion) == version.parse("1.16.0"),
+    version.parse(OrtVersion) == version.parse("1.16.0") or version.parse(OrtVersion) == version.parse("1.16.1"),
     reason="VitisAIQuantization is not supported in ORT 1.16.0 with TensorsData",
 )
 def test_resnet(search_algorithm, execution_order, system, olive_json):
diff --git a/olive/passes/onnx/quant_pre_process.py b/olive/passes/onnx/quant_pre_process.py
deleted file mode 100644
index f324c47deb..0000000000
--- a/olive/passes/onnx/quant_pre_process.py
+++ /dev/null
@@ -1,166 +0,0 @@
-# --------------------------------------------------------------------------
-# Copyright (c) Microsoft, Intel Corporation. All rights reserved.
-# Licensed under the MIT License. See License.txt in the project root for
-# license information.
-# --------------------------------------------------------------------------
-
-import logging
-import shutil
-import tempfile
-import traceback
-from contextlib import contextmanager
-from pathlib import Path
-from typing import Optional
-
-import onnx
-import onnxruntime
-from onnxruntime.quantization.quant_utils import add_pre_process_metadata
-from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference
-
-logger = logging.getLogger(__name__)
-
-# ruff: noqa: N802
-
-
-def quant_pre_process(
-    input_model_path: str,
-    output_model_path: str,
-    skip_optimization: bool = False,
-    skip_onnx_shape: bool = False,
-    skip_symbolic_shape: bool = False,
-    auto_merge: bool = False,
-    int_max: int = 2**31 - 1,
-    guess_output_rank: bool = False,
-    verbose: int = 0,
-    save_as_external_data: bool = False,
-    all_tensors_to_one_file: bool = False,
-    external_data_location: Optional[str] = None,
-    external_data_size_threshold: int = 1024,
-) -> None:
-    """Shape inference and model optimization, in preparation for quantization.
-
-    Args:
-        input_model_path: Path to the input model file")
-        output_model_path: Path to the output model file
-        skip_optimization: Skip model optimization step if true. This may result in ONNX shape
-            inference failure for some models.
-        skip_onnx_shape: Skip ONNX shape inference. Symbolic shape inference is most effective
-            with transformer based models. Skipping all shape inferences may
-            reduce the effectiveness of quantization, as a tensor with unknown
-            shape can not be quantized.
-        skip_symbolic_shape: Skip symbolic shape inference. Symbolic shape inference is most
-            effective with transformer based models. Skipping all shape
-            inferences may reduce the effectiveness of quantization, as a tensor
-            with unknown shape can not be quantized.
-        auto_merge: For symbolic shape inference, automatically merge symbolic dims when
-            conflict happens.
-        int_max: For symbolic shape inference, specify the maximum value for integer to be
-            treated as boundless for ops like slice
-        guess_output_rank: Guess output rank to be the same as input 0 for unknown ops
-        verbose: Logs detailed info of inference, 0: turn off, 1: warnings, 3: detailed
-        save_as_external_data: Saving an ONNX model to external data
-        all_tensors_to_one_file: Saving all the external data to one file
-        external_data_location: The file location to save the external file
-        external_data_size_threshold: The size threshold for external data
-    """
-    with TemporaryDirectory(prefix="pre.quant.") as quant_tmp_dir:
-        temp_path = Path(quant_tmp_dir)
-        model = None
-
-        if not skip_symbolic_shape:
-            logger.info("Performing symbolic shape inference...")
-            model = SymbolicShapeInference.infer_shapes(
-                onnx.load(input_model_path),
-                int_max,
-                auto_merge,
-                guess_output_rank,
-                verbose,
-            )
-
-        if not skip_optimization:
-            # Use ORT optimizers (native code) to optimize model
-            if not skip_symbolic_shape:
-                # Need to save the inferenced model to file so as to run the optimizer
-                input_model_path = str(temp_path / "symbolic_shape_inferred.onnx")
-                if save_as_external_data:
-                    onnx.save_model(
-                        model,
-                        input_model_path,
-                        save_as_external_data=True,
-                        all_tensors_to_one_file=all_tensors_to_one_file,
-                        size_threshold=external_data_size_threshold,
-                        convert_attribute=False,
-                    )
-                else:
-                    onnx.save(model, input_model_path)
-                model = None
-
-            opt_model_path = str(temp_path / "optimized.onnx")
-            try:
-                sess_option = onnxruntime.SessionOptions()
-                sess_option.optimized_model_filepath = opt_model_path
-                sess_option.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_BASIC
-                _ = onnxruntime.InferenceSession(input_model_path, sess_option, providers=["CPUExecutionProvider"])
-            except Exception:
-                logger.error(
-                    "ONNX Runtime Model Optimization Failed! Consider rerun with option `--skip_optimization'."
-                )
-                logger.error(traceback.format_exc())
-
-            input_model_path = opt_model_path
-
-        if not skip_onnx_shape:
-            # ONNX shape inference.
-            # According to docs, infer_shapes_path should be used for 2G+ models.
-            # If the skip optimization is specified, we could be dealing with a
-            # large model. So be on the safe side, save the model
-            if model is not None:
-                input_model_path = str(temp_path / "symbolic_shape_inferred.onnx")
-                if save_as_external_data:
-                    onnx.save_model(
-                        model,
-                        input_model_path,
-                        save_as_external_data=True,
-                        all_tensors_to_one_file=all_tensors_to_one_file,
-                        size_threshold=external_data_size_threshold,
-                        convert_attribute=False,
-                    )
-                else:
-                    onnx.save(model, input_model_path)
-                model = None
-
-            inferred_model_path = str(temp_path / "onnx_shape_inferred.onnx")
-            onnx.shape_inference.infer_shapes_path(input_model_path, inferred_model_path)
-            model = onnx.load(inferred_model_path)
-
-    if model is None:
-        model = onnx.load(input_model_path)
-
-    add_pre_process_metadata(model)
-
-    if save_as_external_data:
-        onnx.save_model(
-            model,
-            output_model_path,
-            save_as_external_data=True,
-            all_tensors_to_one_file=all_tensors_to_one_file,
-            location=external_data_location,
-            size_threshold=external_data_size_threshold,
-            convert_attribute=False,
-        )
-    else:
-        onnx.save(model, output_model_path)
-
-
-@contextmanager
-def TemporaryDirectory(**kwargs):
-    # TODO(myguo): this is a workaround for issue https://github.com/microsoft/onnxruntime/issues/17627
-    # on Windows.
-    name = tempfile.mkdtemp(**kwargs)
-    try:
-        yield name
-    finally:
-        try:
-            shutil.rmtree(name)
-        except OSError:
-            logger.warning(f"Failed to remove: {name}", exc_info=True)
diff --git a/olive/passes/onnx/quantization.py b/olive/passes/onnx/quantization.py
index 256f3c6b85..4273f0b243 100644
--- a/olive/passes/onnx/quantization.py
+++ b/olive/passes/onnx/quantization.py
@@ -434,10 +434,9 @@ def _run_for_config(
         return model_proto_to_olive_model(onnx_model, output_model_path, config)
 
     def _quant_preprocess(self, model: ONNXModel, output_model_path: Union[str, Path]) -> ONNXModel:
-        from olive.passes.onnx.quant_pre_process import quant_pre_process
+        from onnxruntime.quantization.preprocess import quant_pre_process
 
         try:
-            # TODO(myguo): use ORT version once the Windows issue is fixed
             quant_pre_process(
                 input_model_path=model.model_path,
                 output_model_path=str(output_model_path),
diff --git a/test/unit_test/passes/vitis_ai/test_vitis_ai_quantization.py b/test/unit_test/passes/vitis_ai/test_vitis_ai_quantization.py
index 169ce43746..cbd34ab00a 100644
--- a/test/unit_test/passes/vitis_ai/test_vitis_ai_quantization.py
+++ b/test/unit_test/passes/vitis_ai/test_vitis_ai_quantization.py
@@ -37,7 +37,7 @@ def dummy_calibration_reader(data_dir=None, batch_size=1, *args, **kwargs):
 
 
 @pytest.mark.skipif(
-    version.parse(OrtVersion) == version.parse("1.16.0"),
+    version.parse(OrtVersion) == version.parse("1.16.0") or version.parse(OrtVersion) == version.parse("1.16.1"),
     reason="VitisAIQuantization is not supported in ORT 1.16.0 with TensorsData",
 )
 def test_vitis_ai_quantization_pass(tmp_path):