From 1082137561bbb545100b05a76dfbf30bbb376e0f Mon Sep 17 00:00:00 2001
From: Matthias Cremon <matthiascremon@meta.com>
Date: Wed, 17 Sep 2025 14:13:49 -0700
Subject: [PATCH] Remove generic versions of aten ops and use portable instead

Summary: As titled, we don't need to maintain references for those ops since portable exists.

Differential Revision: D82667318
---
 backends/cadence/aot/TARGETS                  |  4 --
 .../cadence/generic/operators/CMakeLists.txt  |  8 +--
 backends/cadence/generic/operators/op_add.cpp | 61 -------------------
 .../generic/operators/op_embedding.cpp        | 41 -------------
 .../cadence/generic/operators/op_full.cpp     | 50 ---------------
 .../generic/operators/op_view_copy.cpp        | 29 ---------
 .../cadence/generic/operators/targets.bzl     | 58 ------------------
 7 files changed, 4 insertions(+), 247 deletions(-)
 delete mode 100644 backends/cadence/generic/operators/op_add.cpp
 delete mode 100644 backends/cadence/generic/operators/op_embedding.cpp
 delete mode 100644 backends/cadence/generic/operators/op_full.cpp
 delete mode 100644 backends/cadence/generic/operators/op_view_copy.cpp

diff --git a/backends/cadence/aot/TARGETS b/backends/cadence/aot/TARGETS
index b54f1ac3ba6..49a7c966b1c 100644
--- a/backends/cadence/aot/TARGETS
+++ b/backends/cadence/aot/TARGETS
@@ -144,11 +144,7 @@ executorch_generated_lib(
     deps = [
         "//executorch/backends/cadence/generic/kernels:cadence_kernels",
         # Individual operator targets instead of combined cadence_generic_ops
-        "//executorch/backends/cadence/generic/operators:op_add",
-        "//executorch/backends/cadence/generic/operators:op_embedding",
-        "//executorch/backends/cadence/generic/operators:op_full",
         "//executorch/backends/cadence/generic/operators:op_requantize_out",
-        "//executorch/backends/cadence/generic/operators:op_view_copy",
         "//executorch/backends/cadence/generic/operators:im2row_out",
         "//executorch/backends/cadence/generic/operators:dequantize_per_tensor",
         "//executorch/backends/cadence/generic/operators:quantize_per_tensor",
diff --git a/backends/cadence/generic/operators/CMakeLists.txt b/backends/cadence/generic/operators/CMakeLists.txt
index d88701007f9..b74ead7eddc 100644
--- a/backends/cadence/generic/operators/CMakeLists.txt
+++ b/backends/cadence/generic/operators/CMakeLists.txt
@@ -16,10 +16,6 @@ include(${EXECUTORCH_ROOT}/tools/cmake/Codegen.cmake)
 
 # ATen compliant ops that are needed to run this model.
 set(_aten_ops__srcs
-    "${CMAKE_CURRENT_SOURCE_DIR}/op_add.cpp"
-    "${CMAKE_CURRENT_SOURCE_DIR}/op_embedding.cpp"
-    "${CMAKE_CURRENT_SOURCE_DIR}/op_full.cpp"
-    "${CMAKE_CURRENT_SOURCE_DIR}/op_view_copy.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/activation_ops_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/copy_ops_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/broadcast_util.cpp"
@@ -31,10 +27,13 @@ set(_aten_ops__srcs
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/repeat_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/slice_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/pattern/unary_ufunc_realhbbf16_to_floathbf16.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_add.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_bmm.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_cat.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_clone.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_div.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_embedding.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_full.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_hardtanh.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_max_pool2d_with_indices.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_mean.cpp"
@@ -58,6 +57,7 @@ set(_aten_ops__srcs
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_native_group_norm.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_sum.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_select_copy.cpp"
+    "${EXECUTORCH_ROOT}/kernels/portable/cpu/op_view_copy.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/dtype_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/normalization_ops_util.cpp"
     "${EXECUTORCH_ROOT}/kernels/portable/cpu/util/select_copy_util.cpp"
diff --git a/backends/cadence/generic/operators/op_add.cpp b/backends/cadence/generic/operators/op_add.cpp
deleted file mode 100644
index 89b67467605..00000000000
--- a/backends/cadence/generic/operators/op_add.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include <executorch/kernels/portable/cpu/scalar_utils.h>
-#include <executorch/kernels/portable/cpu/util/broadcast_util.h>
-#include <executorch/runtime/kernel/kernel_includes.h>
-#include <executorch/runtime/platform/assert.h>
-
-namespace torch {
-namespace executor {
-namespace native {
-
-Tensor& add_out(
-    KernelRuntimeContext& ctx,
-    const Tensor& a,
-    const Tensor& b,
-    const Scalar& alpha,
-    Tensor& out) {
-  (void)ctx;
-
-  ScalarType a_type = a.scalar_type();
-  ScalarType b_type = b.scalar_type();
-  ScalarType common_type = promoteTypes(a_type, b_type);
-  ScalarType out_type = out.scalar_type();
-
-  ET_CHECK_MSG(a_type == ScalarType::Float, "Input tensor not a float.\n");
-  ET_CHECK_MSG(b_type == ScalarType::Float, "Input tensor not a float.\n");
-  ET_CHECK_MSG(out_type == ScalarType::Float, "Output tensor not a float.\n");
-
-  ET_CHECK(canCast(common_type, out_type));
-
-  using CTYPE_A = float;
-  using CTYPE_B = float;
-  using CTYPE_IN = float;
-  using CTYPE_OUT = float;
-  CTYPE_IN alpha_val;
-  ET_EXTRACT_SCALAR(alpha, alpha_val);
-
-  apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
-      [alpha_val](const CTYPE_A val_a, const CTYPE_B val_b) {
-        CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
-        CTYPE_IN b_casted = static_cast<CTYPE_IN>(val_b);
-        CTYPE_IN value = a_casted + alpha_val * b_casted;
-
-        return static_cast<CTYPE_OUT>(value);
-      },
-      a,
-      b,
-      out);
-
-  return out;
-}
-
-} // namespace native
-} // namespace executor
-} // namespace torch
diff --git a/backends/cadence/generic/operators/op_embedding.cpp b/backends/cadence/generic/operators/op_embedding.cpp
deleted file mode 100644
index ce28789a156..00000000000
--- a/backends/cadence/generic/operators/op_embedding.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include <executorch/runtime/kernel/kernel_includes.h>
-
-namespace torch {
-namespace executor {
-namespace native {
-
-using executorch::aten::Tensor;
-using executorch::runtime::KernelRuntimeContext;
-
-void embedding_out(
-    KernelRuntimeContext& ctx,
-    const Tensor& weight,
-    const Tensor& indices,
-    int64_t padding_idx,
-    bool scale_grad_by_freq,
-    bool sparse,
-    Tensor& out) {
-  int64_t nbytes_per_entry = weight.size(1) * weight.element_size();
-  const char* w_data = weight.const_data_ptr<char>();
-  char* out_data = out.mutable_data_ptr<char>();
-  const int64_t* indices_ptr = indices.const_data_ptr<int64_t>();
-
-  for (int i = 0, e = indices.numel(); i < e; i++) {
-    // memcpy(dest, src, nbytes);
-    memcpy(
-        out_data, w_data + nbytes_per_entry * indices_ptr[i], nbytes_per_entry);
-    out_data += nbytes_per_entry;
-  }
-}
-
-} // namespace native
-} // namespace executor
-} // namespace torch
diff --git a/backends/cadence/generic/operators/op_full.cpp b/backends/cadence/generic/operators/op_full.cpp
deleted file mode 100644
index 21d5fc56299..00000000000
--- a/backends/cadence/generic/operators/op_full.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include <executorch/kernels/portable/cpu/scalar_utils.h>
-#include <executorch/runtime/kernel/kernel_includes.h>
-
-namespace torch {
-namespace executor {
-namespace native {
-
-using executorch::aten::ScalarType;
-using executorch::aten::Tensor;
-
-Tensor& full_out(
-    KernelRuntimeContext& ctx,
-    const IntArrayRef sizes,
-    const Scalar& fill_value,
-    Tensor& out) {
-  (void)ctx;
-
-  ScalarType val_type = utils::get_scalar_dtype(fill_value);
-  ScalarType out_type = out.scalar_type();
-
-  Error err = resize_tensor(out, sizes);
-  ET_CHECK_MSG(err == Error::Ok, "Could not resize out");
-
-  ET_SWITCH_REAL_TYPES_AND(Bool, val_type, ctx, "full", CTYPE_VAL, [&] {
-    CTYPE_VAL val;
-    ET_EXTRACT_SCALAR(fill_value, val);
-
-    ET_SWITCH_REAL_TYPES_AND(Bool, out_type, ctx, "full", CTYPE_OUT, [&] {
-      CTYPE_OUT val_casted = static_cast<CTYPE_OUT>(val);
-      auto data_out = out.mutable_data_ptr<CTYPE_OUT>();
-      for (size_t i = 0; i < out.numel(); ++i) {
-        data_out[i] = val_casted;
-      }
-    });
-  });
-
-  return out;
-}
-
-} // namespace native
-} // namespace executor
-} // namespace torch
diff --git a/backends/cadence/generic/operators/op_view_copy.cpp b/backends/cadence/generic/operators/op_view_copy.cpp
deleted file mode 100644
index 162e9ee201b..00000000000
--- a/backends/cadence/generic/operators/op_view_copy.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include <executorch/runtime/kernel/kernel_includes.h>
-
-namespace torch {
-namespace executor {
-namespace native {
-
-using executorch::aten::Tensor;
-using executorch::runtime::KernelRuntimeContext;
-
-Tensor& view_copy_out(
-    KernelRuntimeContext& ctx,
-    const Tensor& input,
-    const IntArrayRef size,
-    Tensor& out) {
-  memcpy(out.mutable_data_ptr(), input.const_data_ptr(), input.nbytes());
-  return out;
-}
-
-} // namespace native
-} // namespace executor
-} // namespace torch
diff --git a/backends/cadence/generic/operators/targets.bzl b/backends/cadence/generic/operators/targets.bzl
index b3c305c9c02..193b43c2b6d 100644
--- a/backends/cadence/generic/operators/targets.bzl
+++ b/backends/cadence/generic/operators/targets.bzl
@@ -4,64 +4,6 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
 def define_common_targets():
     # Individual operator targets with optimized dependencies
 
-    # Basic operators (need broadcast_util and scalar_utils)
-    runtime.cxx_library(
-        name = "op_add",
-        srcs = ["op_add.cpp"],
-        platforms = CXX,
-        deps = [
-            "//executorch/kernels/portable/cpu/util:broadcast_util",
-            "//executorch/runtime/kernel:kernel_includes",
-            "//executorch/kernels/portable/cpu:scalar_utils",
-        ],
-        visibility = [
-            "//executorch/backends/cadence/...",
-            "@EXECUTORCH_CLIENTS",
-        ],
-    )
-
-    runtime.cxx_library(
-        name = "op_full",
-        srcs = ["op_full.cpp"],
-        platforms = CXX,
-        deps = [
-            "//executorch/runtime/kernel:kernel_includes",
-            "//executorch/kernels/portable/cpu:scalar_utils",
-        ],
-        visibility = [
-            "//executorch/backends/cadence/...",
-            "@EXECUTORCH_CLIENTS",
-        ],
-    )
-
-    # Simple operators (only need kernel_includes)
-    runtime.cxx_library(
-        name = "op_embedding",
-        srcs = ["op_embedding.cpp"],
-        platforms = CXX,
-        deps = [
-            "//executorch/runtime/kernel:kernel_includes",
-        ],
-        visibility = [
-            "//executorch/backends/cadence/...",
-            "@EXECUTORCH_CLIENTS",
-        ],
-    )
-
-    runtime.cxx_library(
-        name = "op_view_copy",
-        srcs = ["op_view_copy.cpp"],
-        platforms = CXX,
-        deps = [
-            "//executorch/runtime/kernel:kernel_includes",
-        ],
-        visibility = [
-            "//executorch/backends/cadence/...",
-            "@EXECUTORCH_CLIENTS",
-        ],
-    )
-
-    # Operators that need the operators.h header and basic runtime
     runtime.cxx_library(
         name = "im2row_out",
         srcs = ["im2row_out.cpp"],