From 28fd54281ddfe6edbc3a4038d5426e428712e82e Mon Sep 17 00:00:00 2001
From: Min Guo <minguo@meta.com>
Date: Fri, 29 Aug 2025 17:11:48 -0700
Subject: [PATCH] export static llama with masked softmax (#13832)

Summary:
Pull Request resolved: https://github.com/pytorch/executorch/pull/13832

export the model with soft attn max support

Reviewed By: limintang, sxu

Differential Revision: D81248691
---
 examples/models/llama/static_attention.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/models/llama/static_attention.py b/examples/models/llama/static_attention.py
index ffcdb6cb9ce..395fce85613 100644
--- a/examples/models/llama/static_attention.py
+++ b/examples/models/llama/static_attention.py
@@ -1044,5 +1044,5 @@ def transfer_weight(linear, conv2d):
 
 @register_attention("static_mha")
 class StaticAttentionMHA(StaticAttention):
-    def __init__(self, config: ModelArgs, layer_id: int, rope: Rope):
-        super().__init__(config, layer_id, rope, split_mha=False)
+    def __init__(self, config: ModelArgs, layer_id: int, rope: Rope, **kwargs: Any):
+        super().__init__(config, layer_id, rope, split_mha=False, **kwargs)