diff --git a/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_out.cpp b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_out.cpp
index c7c9fd9ef8c..c5ebac73b4b 100644
--- a/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_out.cpp
+++ b/backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_out.cpp
@@ -300,7 +300,7 @@ void xa_opt_quantized_conv2d_nhwc(
       return;
     }
 
-    if (groups == input_channels) {
+    if (is_depthwise) {
       WORD32 channels_multiplier = out_channels / input_channels;
 
       scratch_size = xa_nn_conv2d_depthwise_getsize(
@@ -359,6 +359,26 @@ void xa_opt_quantized_conv2d_nhwc(
       return;
     }
   }
+
+  // Fallback to generic grouped conv for cases not handled by nnlib
+  // (e.g. grouped conv with 4D weight where is_depthwise is false)
+  ::impl::generic::native::quantized_conv2d_nhwc_per_tensor_out(
+      ctx,
+      input,
+      weight,
+      bias,
+      stride,
+      padding,
+      dilation,
+      groups,
+      in_zero_point,
+      weight_zero_point,
+      bias_scale,
+      output_scale,
+      output_zero_point,
+      0, // out_multiplier (unused)
+      0, // out_shift (unused)
+      out);
 }
 
 void quantized_conv2d_nhwc(