From 28fd54281ddfe6edbc3a4038d5426e428712e82e Mon Sep 17 00:00:00 2001 From: Min Guo Date: Fri, 29 Aug 2025 17:11:48 -0700 Subject: [PATCH] export static llama with masked softmax (#13832) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/13832 export the model with soft attn max support Reviewed By: limintang, sxu Differential Revision: D81248691 --- examples/models/llama/static_attention.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/models/llama/static_attention.py b/examples/models/llama/static_attention.py index ffcdb6cb9ce..395fce85613 100644 --- a/examples/models/llama/static_attention.py +++ b/examples/models/llama/static_attention.py @@ -1044,5 +1044,5 @@ def transfer_weight(linear, conv2d): @register_attention("static_mha") class StaticAttentionMHA(StaticAttention): - def __init__(self, config: ModelArgs, layer_id: int, rope: Rope): - super().__init__(config, layer_id, rope, split_mha=False) + def __init__(self, config: ModelArgs, layer_id: int, rope: Rope, **kwargs: Any): + super().__init__(config, layer_id, rope, split_mha=False, **kwargs)