Skip to content

Commit

Permalink
Olive SDXL optimization.
Browse files Browse the repository at this point in the history
  • Loading branch information
lshqqytiger committed Aug 20, 2023
1 parent 58b9fb5 commit 469daf9
Show file tree
Hide file tree
Showing 14 changed files with 738 additions and 550 deletions.
126 changes: 126 additions & 0 deletions configs/olive_optimize_sdxl/config_text_encoder.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
{
"input_model": {
"type": "PyTorchModel",
"config": {
"model_path": "stabilityai/stable-diffusion-xl-base-1.0",
"model_loader": "text_encoder_load",
"model_script": "modules/sd_olive_scripts.py",
"io_config": {
"input_names": ["input_ids", "output_hidden_states"],
"output_names": [
"last_hidden_state",
"pooler_output",
"hidden_states.0",
"hidden_states.1",
"hidden_states.2",
"hidden_states.3",
"hidden_states.4",
"hidden_states.5",
"hidden_states.6",
"hidden_states.7",
"hidden_states.8",
"hidden_states.9",
"hidden_states.10",
"hidden_states.11",
"hidden_states.12"
],
"dynamic_axes": {
"input_ids": { "0": "batch_size", "1": "sequence_length" },
"last_hidden_state": { "0": "batch_size", "1": "sequence_length" },
"pooler_output": { "0": "batch_size" },
"hidden_states.0": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.1": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.2": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.3": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.4": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.5": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.6": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.7": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.8": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.9": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.10": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.11": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.12": { "0": "batch_size", "1": "sequence_length" }
}
},
"dummy_inputs_func": "text_encoder_conversion_inputs"
}
},
"systems": {
"local_system": {
"type": "LocalSystem",
"config": {
"accelerators": ["gpu"]
}
}
},
"evaluators": {
"common_evaluator": {
"metrics": [
{
"name": "latency",
"type": "latency",
"sub_types": [{ "name": "avg" }],
"user_config": {
"user_script": "modules/sd_olive_scripts.py",
"dataloader_func": "text_encoder_data_loader",
"batch_size": 1
}
}
]
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14
}
},
"optimize": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "clip",
"float16": true,
"use_gpu": true,
"keep_io_types": true,
"optimization_options": {
"enable_gelu": true,
"enable_layer_norm": true,
"enable_attention": true,
"use_multi_head_attention": true,
"enable_skip_layer_norm": false,
"enable_embed_layer_norm": true,
"enable_bias_skip_layer_norm": false,
"enable_bias_gelu": true,
"enable_gelu_approximation": false,
"enable_qordered_matmul": false,
"enable_shape_inference": true,
"enable_gemm_fast_gelu": false,
"enable_nhwc_conv": false,
"enable_group_norm": true,
"enable_bias_splitgelu": false,
"enable_packed_qkv": true,
"enable_packed_kv": true,
"enable_bias_add": false
},
"force_fp32_ops": ["RandomNormalLike"]
}
}
},
"engine": {
"search_strategy": {
"execution_order": "joint",
"search_algorithm": "exhaustive"
},
"evaluator": "common_evaluator",
"evaluate_input_model": false,
"host": "local_system",
"target": "local_system",
"cache_dir": "cache",
"output_name": "text_encoder",
"output_dir": "footprints",
"execution_providers": ["DmlExecutionProvider"]
}
}
166 changes: 166 additions & 0 deletions configs/olive_optimize_sdxl/config_text_encoder_2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
{
"input_model": {
"type": "PyTorchModel",
"config": {
"model_path": "stabilityai/stable-diffusion-xl-base-1.0",
"model_loader": "text_encoder_2_load",
"model_script": "modules/sd_olive_scripts.py",
"io_config": {
"input_names": ["input_ids", "output_hidden_states"],
"output_names": [
"text_embeds",
"last_hidden_state",
"hidden_states.0",
"hidden_states.1",
"hidden_states.2",
"hidden_states.3",
"hidden_states.4",
"hidden_states.5",
"hidden_states.6",
"hidden_states.7",
"hidden_states.8",
"hidden_states.9",
"hidden_states.10",
"hidden_states.11",
"hidden_states.12",
"hidden_states.13",
"hidden_states.14",
"hidden_states.15",
"hidden_states.16",
"hidden_states.17",
"hidden_states.18",
"hidden_states.19",
"hidden_states.20",
"hidden_states.21",
"hidden_states.22",
"hidden_states.23",
"hidden_states.24",
"hidden_states.25",
"hidden_states.26",
"hidden_states.27",
"hidden_states.28",
"hidden_states.29",
"hidden_states.30",
"hidden_states.31",
"hidden_states.32"
],
"dynamic_axes": {
"input_ids": { "0": "batch_size", "1": "sequence_length" },
"text_embeds": { "0": "batch_size", "1": "sequence_length" },
"last_hidden_state": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.0": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.1": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.2": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.3": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.4": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.5": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.6": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.7": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.8": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.9": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.10": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.11": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.12": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.13": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.14": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.15": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.16": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.17": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.18": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.19": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.20": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.21": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.22": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.23": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.24": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.25": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.26": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.27": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.28": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.29": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.30": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.31": { "0": "batch_size", "1": "sequence_length" },
"hidden_states.32": { "0": "batch_size", "1": "sequence_length" }
}
},
"dummy_inputs_func": "text_encoder_2_conversion_inputs"
}
},
"systems": {
"local_system": {
"type": "LocalSystem",
"config": {
"accelerators": ["gpu"]
}
}
},
"evaluators": {
"common_evaluator": {
"metrics": [
{
"name": "latency",
"type": "latency",
"sub_types": [{ "name": "avg" }],
"user_config": {
"user_script": "modules/sd_olive_scripts.py",
"dataloader_func": "text_encoder_2_data_loader",
"batch_size": 1
}
}
]
}
},
"passes": {
"convert": {
"type": "OnnxConversion",
"config": {
"target_opset": 14
}
},
"optimize": {
"type": "OrtTransformersOptimization",
"disable_search": true,
"config": {
"model_type": "clip",
"float16": true,
"use_gpu": true,
"keep_io_types": true,
"optimization_options": {
"enable_gelu": true,
"enable_layer_norm": true,
"enable_attention": true,
"use_multi_head_attention": true,
"enable_skip_layer_norm": false,
"enable_embed_layer_norm": true,
"enable_bias_skip_layer_norm": false,
"enable_bias_gelu": true,
"enable_gelu_approximation": false,
"enable_qordered_matmul": false,
"enable_shape_inference": true,
"enable_gemm_fast_gelu": false,
"enable_nhwc_conv": false,
"enable_group_norm": true,
"enable_bias_splitgelu": false,
"enable_packed_qkv": true,
"enable_packed_kv": true,
"enable_bias_add": false
},
"force_fp32_ops": ["RandomNormalLike"]
}
}
},
"engine": {
"search_strategy": {
"execution_order": "joint",
"search_algorithm": "exhaustive"
},
"evaluator": "common_evaluator",
"evaluate_input_model": false,
"host": "local_system",
"target": "local_system",
"cache_dir": "cache",
"output_name": "text_encoder_2",
"output_dir": "footprints",
"execution_providers": ["DmlExecutionProvider"]
}
}
Loading

3 comments on commit 469daf9

@hisham-hchowdhu
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hi @lshqqytiger , this commit seemed to block the onnx path with this error:
File "\stable-diffusion-webui-directml\venv\lib\site-packages\transformers\utils\versions.py", line 44, in _compare_versions
raise ImportError(
ImportError: accelerate>=0.20.3 is required for a normal functioning of this module, but found accelerate==0.18.0.
Try: pip install transformers -U or pip install -e '.[dev]' if you're working with git main

if I go to 1 commit before it works fine. are you aware of this issue and what is the fix for this?
Thanks,

@lshqqytiger
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May be fixed in 45d7cc1

@hisham-hchowdhu
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like fixed. thanks! i had to change requirements.txt file manually to make it work.

Please sign in to comment.