diff --git a/torch/nn/modules/transformer.py b/torch/nn/modules/transformer.py index 6d70c51772f9..9cb67365935c 100644 --- a/torch/nn/modules/transformer.py +++ b/torch/nn/modules/transformer.py @@ -500,7 +500,7 @@ def forward(self, src: Tensor, src_mask: Optional[Tensor] = None, "input/output projection weights or biases requires_grad") if not why_not_sparsity_fast_path: - merged_mask, mask_type = F.merge_masks(src_mask, src_key_padding_mask, src) + merged_mask, mask_type = F._merge_masks(src_mask, src_key_padding_mask, src) return torch._transformer_encoder_layer_fwd( src, self.self_attn.embed_dim,