pytorch
diff --git a/‎examples/models/llama/source_transformation/torchtune/attention.py‎
Lines changed: 42 additions & 0 deletions b/‎examples/models/llama/source_transformation/torchtune/attention.py‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎extension/llm/modules/README.md‎
Lines changed: 13 additions & 10 deletions b/‎extension/llm/modules/README.md‎
Lines changed: 13 additions & 10 deletions
@@ -0,0 +1,42 @@
+import torch
+import torchtune.modules.attention as TorchTuneAttention
+from executorch.examples.models.llama2.source_transformation.torchtune.modules.mha import (
+    MultiHeadAttention,
+)
+
+
+def _replace_mha_with_inference_mha(module: torch.nn.Module) -> None:
+    for name, child in module.named_children():
+        if isinstance(child, TorchTuneAttention.MultiHeadAttention):
+            setattr(
+                module,
+                name,
+                MultiHeadAttention(
+                    embed_dim=child.embed_dim,
+                    num_heads=child.num_heads,
+                    num_kv_heads=child.num_kv_heads,
+                    head_dim=child.head_dim,
+                    q_proj=child.q_proj,
+                    k_proj=child.k_proj,
+                    v_proj=child.v_proj,
+                    output_proj=child.output_proj,
+                    pos_embeddings=child.pos_embeddings,
+                    q_norm=child.q_norm,
+                    k_norm=child.k_norm,
+                    kv_cache=child.kv_cache,
+                    max_seq_len=child.max_seq_len,
+                    is_causal=child.is_causal,
+                    attn_dropout=child.attn_dropout,
+                ),
+            )
+        else:
+            replace_mha_with_inference_mha(child)
+
+
+def replace_mha_with_inference_mha(module: torch.nn.Module) -> torch.nn.Module:
+    """
+    Replace TorchTune's MHA with an inference friendly version of MHA that
+    separates out the inference-related parts for further optimization.
+    """
+    _replace_mha_with_inference_mha(module)
+    return module
@@ -1,14 +1,17 @@
-## Export Friendly Modules
+## Export-friendly Modules
 
-Modules in this directory are:
-* Extending `torch.nn.Module`.
-* Guranteed to work out of the box with `torch.export.export()` and `torch.aot_compile()`.
-* Guranteed to be able to work with ExecuTorch.
+Modules in this directory:
+* Extend `torch.nn.Module`.
+* Are guaranteed to work out of the box with `torch.export.export()`.
+* Should work out of the box with `torch.aot_compile()`.
+* Should be able to workt with ExecuTorch.
 
 All modules should be covered by unit tests to make sure they are:
-1. giving the same output as the reference implementation in PyTorch or torchtune
-2. export friendly
-3. AOTI friendly
-4. ExecuTorch friendly
+1. Give the output as the reference eager model in PyTorch or TorrchTune
+2. Export-friendly
 
-Notice that these modules are subject to change (may upstream to torchtune) so proceed with caution.
+Additionally, we aim to make these modules:
+3. AOTI-friendly
+4. ExecuTorch-friendly
+
+These modules are subject to change (may upstream to TorchTune) so proceed with caution.