diff --git a/backends/cadence/fusion_g3/operators/op_native_layer_norm.cpp b/backends/cadence/fusion_g3/operators/op_native_layer_norm.cpp index 7e71df62d54..11c3edbb6a2 100644 --- a/backends/cadence/fusion_g3/operators/op_native_layer_norm.cpp +++ b/backends/cadence/fusion_g3/operators/op_native_layer_norm.cpp @@ -221,12 +221,15 @@ std::tuple native_layer_norm_out( num_elm *= normalized_shape[i]; } + constexpr size_t kAlignment = + 16; // 16-byte alignment for vectorized operations + float* weight_data; if (weight.has_value()) { weight_data = weight.value().mutable_data_ptr(); } else { executorch::runtime::Result temp_mem_weight = - ctx.allocate_temp(num_elm * sizeof(float)); + ctx.allocate_temp(num_elm * sizeof(float), kAlignment); weight_data = (float*)(temp_mem_weight.get()); for (int i = 0; i < num_elm; i++) { @@ -238,7 +241,7 @@ std::tuple native_layer_norm_out( bias_data = bias.value().mutable_data_ptr(); } else { executorch::runtime::Result temp_mem_bias = - ctx.allocate_temp(num_elm * sizeof(float)); + ctx.allocate_temp(num_elm * sizeof(float), kAlignment); bias_data = (float*)(temp_mem_bias.get()); for (int i = 0; i < num_elm; i++) { diff --git a/backends/cadence/hifi/kernels/kernels.cpp b/backends/cadence/hifi/kernels/kernels.cpp index d2cf6dd5057..98708349fb1 100644 --- a/backends/cadence/hifi/kernels/kernels.cpp +++ b/backends/cadence/hifi/kernels/kernels.cpp @@ -21,11 +21,19 @@ memcpy(void* dst, const void* src, size_t num_bytes) { } void* allocate_temp_memory(KernelRuntimeContext& ctx, size_t size) { - ET_LOG(Info, "Attempting to allocate %zu bytes of temp memory", size); - Result temp_mem_res = ctx.allocate_temp(size); + constexpr size_t kAlignment = + 16; // 16-byte alignment for vectorized operations + ET_LOG( + Info, + "Attempting to allocate %zu bytes of temp memory (16-byte aligned)", + size); + Result temp_mem_res = ctx.allocate_temp(size, kAlignment); if (temp_mem_res.ok()) { void* ptr = temp_mem_res.get(); - ET_LOG(Info, "Successfully allocated temp memory at %p", ptr); + ET_LOG( + Info, + "Successfully allocated temp memory at %p (16-byte aligned)", + ptr); return ptr; } else { ET_LOG(