From eb5335292ef47b9abbb54c807f689d4ab9f47627 Mon Sep 17 00:00:00 2001
From: Manuel Candales <mcandales@meta.com>
Date: Thu, 11 Sep 2025 06:30:48 -0700
Subject: [PATCH] Add Bfloat16 support to op_convolution (#14166)

Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/14166

Reviewed By: SS-JIA

Differential Revision: D82131166
---
 kernels/portable/cpu/op_convolution.cpp |  2 +-
 kernels/test/op_convolution_test.cpp    | 56 +++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 1 deletion(-)
diff --git a/kernels/portable/cpu/op_convolution.cpp b/kernels/portable/cpu/op_convolution.cpp
index f598ac99444..81dae4a96e9 100644
--- a/kernels/portable/cpu/op_convolution.cpp
+++ b/kernels/portable/cpu/op_convolution.cpp
@@ -412,7 +412,7 @@ Tensor& convolution_out(
   // @lint-ignore CLANGTIDY facebook-hte-CArray
   static constexpr const char name[] = "convolution.out";
 
-  ET_SWITCH_REALH_TYPES(in.scalar_type(), ctx, name, CTYPE, [&]() {
+  ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, name, CTYPE, [&]() {
     const auto load_bias = bias.has_value()
         ? utils::internal::get_load_to_compute_fn<CTYPE, name>(
               ctx, bias.value(), utils::SupportedTensorDtypes::REALHBF16)
diff --git a/kernels/test/op_convolution_test.cpp b/kernels/test/op_convolution_test.cpp
index 070268bd436..1e0e406af44 100644
--- a/kernels/test/op_convolution_test.cpp
+++ b/kernels/test/op_convolution_test.cpp
@@ -728,3 +728,59 @@ TEST_F(OpConvCorrectnessTest, InvalidOutputPadding) {
           groups,
           out));
 }
+
+TEST_F(OpConvCorrectnessTest, HalfTypeSmokeTest) {
+  TensorFactory<ScalarType::Half> tf;
+
+  auto input = tf.make({1, 2, 3}, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0});
+  auto weight = tf.make({2, 2, 2}, {0.5, 0.5, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0});
+  optional<Tensor> bias;
+  auto expected = tf.make({1, 2, 2}, {6.0, 8.0, 12.0, 16.0});
+  auto out = tf.zeros({1, 2, 2});
+
+  int64_t stride[1] = {1};
+  int64_t padding[1] = {0};
+  int64_t dilation[1] = {1};
+  int64_t output_padding[1] = {0};
+
+  op_convolution_out(
+      input,
+      weight,
+      bias,
+      executorch::aten::ArrayRef<int64_t>{stride, 1},
+      executorch::aten::ArrayRef<int64_t>{padding, 1},
+      executorch::aten::ArrayRef<int64_t>{dilation, 1},
+      false,
+      executorch::aten::ArrayRef<int64_t>{output_padding, 1},
+      int64_t(1),
+      out);
+  EXPECT_TENSOR_CLOSE(out, expected);
+}
+
+TEST_F(OpConvCorrectnessTest, BFloat16TypeSmokeTest) {
+  TensorFactory<ScalarType::BFloat16> tf;
+
+  auto input = tf.make({1, 2, 3}, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0});
+  auto weight = tf.make({2, 2, 2}, {0.5, 0.5, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0});
+  optional<Tensor> bias;
+  auto expected = tf.make({1, 2, 2}, {6.0, 8.0, 12.0, 16.0});
+  auto out = tf.zeros({1, 2, 2});
+
+  int64_t stride[1] = {1};
+  int64_t padding[1] = {0};
+  int64_t dilation[1] = {1};
+  int64_t output_padding[1] = {0};
+
+  op_convolution_out(
+      input,
+      weight,
+      bias,
+      executorch::aten::ArrayRef<int64_t>{stride, 1},
+      executorch::aten::ArrayRef<int64_t>{padding, 1},
+      executorch::aten::ArrayRef<int64_t>{dilation, 1},
+      false,
+      executorch::aten::ArrayRef<int64_t>{output_padding, 1},
+      int64_t(1),
+      out);
+  EXPECT_TENSOR_CLOSE(out, expected);
+}