Skip to content

Conversation

@sparker-arm
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Oct 22, 2025

@llvm/pr-subscribers-backend-webassembly

Author: Sam Parker (sparker-arm)

Changes

Patch is 121.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/164576.diff

3 Files Affected:

  • (modified) llvm/test/CodeGen/WebAssembly/memory-interleave.ll (+1604)
  • (modified) llvm/test/CodeGen/WebAssembly/simd-vector-trunc.ll (+39)
  • (modified) llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll (+1003)
diff --git a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
index 94efe0f4157f7..c968fa87d027b 100644
--- a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
+++ b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
@@ -5,6 +5,7 @@ target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20
 %struct.TwoInts = type { i32, i32 }
 %struct.ThreeInts = type { i32, i32, i32 }
 %struct.FourInts = type { i32, i32, i32, i32 }
+%struct.TwoShorts = type { i16, i16 }
 %struct.ThreeShorts = type { i16, i16, i16 }
 %struct.FourShorts = type { i16, i16, i16, i16 }
 %struct.FiveShorts = type { i16, i16, i16, i16, i16 }
@@ -12,6 +13,8 @@ target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20
 %struct.ThreeBytes = type { i8, i8, i8 }
 %struct.FourBytes = type { i8, i8, i8, i8 }
 %struct.EightBytes = type { i8, i8, i8, i8, i8, i8, i8, i8 }
+%struct.TwoFloats = type { float, float }
+%struct.FourFloats = type { float, float, float, float }
 
 ; CHECK-LABEL: two_ints_same_op:
 ; CHECK: loop
@@ -1536,3 +1539,1604 @@ define hidden void @scale_uv_row_down2_linear(ptr nocapture noundef readonly %0,
 34:                                               ; preds = %6, %4
   ret void
 }
+
+; CHECK-LABEL: two_floats_same_op:
+; CHECK-NOT: f32x4.mul
+define hidden void @two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp21.not = icmp eq i32 %N, 0
+  br i1 %cmp21.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.022 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.022
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.022
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.022
+  store float %mul, ptr %arrayidx3, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %mul8 = fmul float %2, %3
+  %y10 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 4
+  store float %mul8, ptr %y10, align 4
+  %inc = add nuw i32 %i.022, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_vary_op:
+; CHECK-NOT: f32x4
+define hidden void @two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp20.not = icmp eq i32 %N, 0
+  br i1 %cmp20.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.021 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.021
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.021
+  %1 = load float, ptr %arrayidx1, align 4
+  %add = fadd float %0, %1
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.021
+  store float %add, ptr %arrayidx3, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %sub = fsub float %2, %3
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 4
+  store float %sub, ptr %y9, align 4
+  %inc = add nuw i32 %i.021, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_bytes_two_floats_same_op:
+; CHECK: loop
+; CHECK: v128.load64_zero
+; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: v128.load64_zero
+; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s	
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: v128.store
+define hidden void @two_bytes_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp24.not = icmp eq i32 %N, 0
+  br i1 %cmp24.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.025 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoBytes, ptr %a, i32 %i.025
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = sitofp i8 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoBytes, ptr %b, i32 %i.025
+  %1 = load i8, ptr %arrayidx1, align 1
+  %conv3 = sitofp i8 %1 to float
+  %mul = fmul float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.025
+  store float %mul, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 1
+  %2 = load i8, ptr %y, align 1
+  %conv7 = sitofp i8 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 1
+  %3 = load i8, ptr %y9, align 1
+  %conv10 = sitofp i8 %3 to float
+  %mul11 = fmul float %conv7, %conv10
+  %y13 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %mul11, ptr %y13, align 4
+  %inc = add nuw i32 %i.025, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_bytes_two_floats_vary_op:
+; CHECK: v128.load64_zero
+; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: v128.load64_zero
+; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.add
+; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.sub
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: v128.store
+define hidden void @two_bytes_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp23.not = icmp eq i32 %N, 0
+  br i1 %cmp23.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.024 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoBytes, ptr %a, i32 %i.024
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = sitofp i8 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoBytes, ptr %b, i32 %i.024
+  %1 = load i8, ptr %arrayidx1, align 1
+  %conv3 = sitofp i8 %1 to float
+  %add = fadd float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.024
+  store float %add, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 1
+  %2 = load i8, ptr %y, align 1
+  %conv7 = sitofp i8 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 1
+  %3 = load i8, ptr %y9, align 1
+  %conv10 = sitofp i8 %3 to float
+  %sub = fsub float %conv7, %conv10
+  %y12 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %sub, ptr %y12, align 4
+  %inc = add nuw i32 %i.024, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_two_bytes_same_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.splat
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: v128.store64_lane
+define hidden void @two_floats_two_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp22.not = icmp eq i32 %N, 0
+  br i1 %cmp22.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.023 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.023
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.023
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %conv = fptosi float %mul to i8
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoBytes, ptr %res, i32 %i.023
+  store i8 %conv, ptr %arrayidx3, align 1
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %mul8 = fmul float %2, %3
+  %conv9 = fptosi float %mul8 to i8
+  %y11 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 1
+  store i8 %conv9, ptr %y11, align 1
+  %inc = add nuw i32 %i.023, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_two_bytes_vary_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: f32x4.add
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.splat
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: f32x4.sub
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: v128.store64_lane
+define hidden void @two_floats_two_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp21.not = icmp eq i32 %N, 0
+  br i1 %cmp21.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.022 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.022
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.022
+  %1 = load float, ptr %arrayidx1, align 4
+  %add = fadd float %0, %1
+  %conv = fptosi float %add to i8
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoBytes, ptr %res, i32 %i.022
+  store i8 %conv, ptr %arrayidx3, align 1
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %sub = fsub float %2, %3
+  %conv8 = fptosi float %sub to i8
+  %y10 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 1
+  store i8 %conv8, ptr %y10, align 1
+  %inc = add nuw i32 %i.022, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_shorts_two_floats_same_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: v128.store
+define hidden void @two_shorts_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp24.not = icmp eq i32 %N, 0
+  br i1 %cmp24.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.025 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoShorts, ptr %a, i32 %i.025
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv = sitofp i16 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoShorts, ptr %b, i32 %i.025
+  %1 = load i16, ptr %arrayidx1, align 2
+  %conv3 = sitofp i16 %1 to float
+  %mul = fmul float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.025
+  store float %mul, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2
+  %2 = load i16, ptr %y, align 2
+  %conv7 = sitofp i16 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2
+  %3 = load i16, ptr %y9, align 2
+  %conv10 = sitofp i16 %3 to float
+  %mul11 = fmul float %conv7, %conv10
+  %y13 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %mul11, ptr %y13, align 4
+  %inc = add nuw i32 %i.025, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_shorts_two_floats_vary_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.add
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.sub
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: v128.store
+define hidden void @two_shorts_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp23.not = icmp eq i32 %N, 0
+  br i1 %cmp23.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.024 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoShorts, ptr %a, i32 %i.024
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv = sitofp i16 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoShorts, ptr %b, i32 %i.024
+  %1 = load i16, ptr %arrayidx1, align 2
+  %conv3 = sitofp i16 %1 to float
+  %add = fadd float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.024
+  store float %add, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2
+  %2 = load i16, ptr %y, align 2
+  %conv7 = sitofp i16 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2
+  %3 = load i16, ptr %y9, align 2
+  %conv10 = sitofp i16 %3 to float
+  %sub = fsub float %conv7, %conv10
+  %y12 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %sub, ptr %y12, align 4
+  %inc = add nuw i32 %i.024, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_two_shorts_same_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.splat
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: f32x4.mul...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Oct 22, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Sam Parker (sparker-arm)

Changes

Patch is 121.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/164576.diff

3 Files Affected:

  • (modified) llvm/test/CodeGen/WebAssembly/memory-interleave.ll (+1604)
  • (modified) llvm/test/CodeGen/WebAssembly/simd-vector-trunc.ll (+39)
  • (modified) llvm/test/Transforms/LoopVectorize/WebAssembly/memory-interleave.ll (+1003)
diff --git a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
index 94efe0f4157f7..c968fa87d027b 100644
--- a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
+++ b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll
@@ -5,6 +5,7 @@ target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20
 %struct.TwoInts = type { i32, i32 }
 %struct.ThreeInts = type { i32, i32, i32 }
 %struct.FourInts = type { i32, i32, i32, i32 }
+%struct.TwoShorts = type { i16, i16 }
 %struct.ThreeShorts = type { i16, i16, i16 }
 %struct.FourShorts = type { i16, i16, i16, i16 }
 %struct.FiveShorts = type { i16, i16, i16, i16, i16 }
@@ -12,6 +13,8 @@ target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20
 %struct.ThreeBytes = type { i8, i8, i8 }
 %struct.FourBytes = type { i8, i8, i8, i8 }
 %struct.EightBytes = type { i8, i8, i8, i8, i8, i8, i8, i8 }
+%struct.TwoFloats = type { float, float }
+%struct.FourFloats = type { float, float, float, float }
 
 ; CHECK-LABEL: two_ints_same_op:
 ; CHECK: loop
@@ -1536,3 +1539,1604 @@ define hidden void @scale_uv_row_down2_linear(ptr nocapture noundef readonly %0,
 34:                                               ; preds = %6, %4
   ret void
 }
+
+; CHECK-LABEL: two_floats_same_op:
+; CHECK-NOT: f32x4.mul
+define hidden void @two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp21.not = icmp eq i32 %N, 0
+  br i1 %cmp21.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.022 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.022
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.022
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.022
+  store float %mul, ptr %arrayidx3, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %mul8 = fmul float %2, %3
+  %y10 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 4
+  store float %mul8, ptr %y10, align 4
+  %inc = add nuw i32 %i.022, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_vary_op:
+; CHECK-NOT: f32x4
+define hidden void @two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp20.not = icmp eq i32 %N, 0
+  br i1 %cmp20.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.021 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.021
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.021
+  %1 = load float, ptr %arrayidx1, align 4
+  %add = fadd float %0, %1
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.021
+  store float %add, ptr %arrayidx3, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %sub = fsub float %2, %3
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 4
+  store float %sub, ptr %y9, align 4
+  %inc = add nuw i32 %i.021, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_bytes_two_floats_same_op:
+; CHECK: loop
+; CHECK: v128.load64_zero
+; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: v128.load64_zero
+; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s	
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: v128.store
+define hidden void @two_bytes_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp24.not = icmp eq i32 %N, 0
+  br i1 %cmp24.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.025 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoBytes, ptr %a, i32 %i.025
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = sitofp i8 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoBytes, ptr %b, i32 %i.025
+  %1 = load i8, ptr %arrayidx1, align 1
+  %conv3 = sitofp i8 %1 to float
+  %mul = fmul float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.025
+  store float %mul, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 1
+  %2 = load i8, ptr %y, align 1
+  %conv7 = sitofp i8 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 1
+  %3 = load i8, ptr %y9, align 1
+  %conv10 = sitofp i8 %3 to float
+  %mul11 = fmul float %conv7, %conv10
+  %y13 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %mul11, ptr %y13, align 4
+  %inc = add nuw i32 %i.025, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_bytes_two_floats_vary_op:
+; CHECK: v128.load64_zero
+; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: v128.load64_zero
+; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.add
+; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK: i16x8.extend_low_i8x16_s
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.sub
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: v128.store
+define hidden void @two_bytes_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp23.not = icmp eq i32 %N, 0
+  br i1 %cmp23.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.024 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoBytes, ptr %a, i32 %i.024
+  %0 = load i8, ptr %arrayidx, align 1
+  %conv = sitofp i8 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoBytes, ptr %b, i32 %i.024
+  %1 = load i8, ptr %arrayidx1, align 1
+  %conv3 = sitofp i8 %1 to float
+  %add = fadd float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.024
+  store float %add, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 1
+  %2 = load i8, ptr %y, align 1
+  %conv7 = sitofp i8 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 1
+  %3 = load i8, ptr %y9, align 1
+  %conv10 = sitofp i8 %3 to float
+  %sub = fsub float %conv7, %conv10
+  %y12 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %sub, ptr %y12, align 4
+  %inc = add nuw i32 %i.024, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_two_bytes_same_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.splat
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: v128.store64_lane
+define hidden void @two_floats_two_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp22.not = icmp eq i32 %N, 0
+  br i1 %cmp22.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.023 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.023
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.023
+  %1 = load float, ptr %arrayidx1, align 4
+  %mul = fmul float %0, %1
+  %conv = fptosi float %mul to i8
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoBytes, ptr %res, i32 %i.023
+  store i8 %conv, ptr %arrayidx3, align 1
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %mul8 = fmul float %2, %3
+  %conv9 = fptosi float %mul8 to i8
+  %y11 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 1
+  store i8 %conv9, ptr %y11, align 1
+  %inc = add nuw i32 %i.023, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_two_bytes_vary_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: f32x4.add
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.splat
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: f32x4.sub
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i8x16.replace_lane
+; CHECK: v128.store64_lane
+define hidden void @two_floats_two_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp21.not = icmp eq i32 %N, 0
+  br i1 %cmp21.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.022 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoFloats, ptr %a, i32 %i.022
+  %0 = load float, ptr %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoFloats, ptr %b, i32 %i.022
+  %1 = load float, ptr %arrayidx1, align 4
+  %add = fadd float %0, %1
+  %conv = fptosi float %add to i8
+  %arrayidx3 = getelementptr inbounds nuw %struct.TwoBytes, ptr %res, i32 %i.022
+  store i8 %conv, ptr %arrayidx3, align 1
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 4
+  %2 = load float, ptr %y, align 4
+  %y7 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 4
+  %3 = load float, ptr %y7, align 4
+  %sub = fsub float %2, %3
+  %conv8 = fptosi float %sub to i8
+  %y10 = getelementptr inbounds nuw i8, ptr %arrayidx3, i32 1
+  store i8 %conv8, ptr %y10, align 1
+  %inc = add nuw i32 %i.022, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_shorts_two_floats_same_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.mul
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: v128.store
+define hidden void @two_shorts_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp24.not = icmp eq i32 %N, 0
+  br i1 %cmp24.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.025 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoShorts, ptr %a, i32 %i.025
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv = sitofp i16 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoShorts, ptr %b, i32 %i.025
+  %1 = load i16, ptr %arrayidx1, align 2
+  %conv3 = sitofp i16 %1 to float
+  %mul = fmul float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.025
+  store float %mul, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2
+  %2 = load i16, ptr %y, align 2
+  %conv7 = sitofp i16 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2
+  %3 = load i16, ptr %y9, align 2
+  %conv10 = sitofp i16 %3 to float
+  %mul11 = fmul float %conv7, %conv10
+  %y13 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %mul11, ptr %y13, align 4
+  %inc = add nuw i32 %i.025, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_shorts_two_floats_vary_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.add
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
+; CHECK: i32x4.extend_low_i16x8_s
+; CHECK: f32x4.convert_i32x4_s
+; CHECK: f32x4.sub
+; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31
+; CHECK: v128.store
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23
+; CHECK: v128.store
+define hidden void @two_shorts_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) {
+entry:
+  %cmp23.not = icmp eq i32 %N, 0
+  br i1 %cmp23.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.024 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds nuw %struct.TwoShorts, ptr %a, i32 %i.024
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv = sitofp i16 %0 to float
+  %arrayidx1 = getelementptr inbounds nuw %struct.TwoShorts, ptr %b, i32 %i.024
+  %1 = load i16, ptr %arrayidx1, align 2
+  %conv3 = sitofp i16 %1 to float
+  %add = fadd float %conv, %conv3
+  %arrayidx4 = getelementptr inbounds nuw %struct.TwoFloats, ptr %res, i32 %i.024
+  store float %add, ptr %arrayidx4, align 4
+  %y = getelementptr inbounds nuw i8, ptr %arrayidx, i32 2
+  %2 = load i16, ptr %y, align 2
+  %conv7 = sitofp i16 %2 to float
+  %y9 = getelementptr inbounds nuw i8, ptr %arrayidx1, i32 2
+  %3 = load i16, ptr %y9, align 2
+  %conv10 = sitofp i16 %3 to float
+  %sub = fsub float %conv7, %conv10
+  %y12 = getelementptr inbounds nuw i8, ptr %arrayidx4, i32 4
+  store float %sub, ptr %y12, align 4
+  %inc = add nuw i32 %i.024, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: two_floats_two_shorts_same_op:
+; CHECK: loop
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: v128.load
+; CHECK: v128.load
+; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK: f32x4.mul
+; CHECK: f32x4.extract_lane
+; CHECK: i32.trunc_sat_f32_s
+; CHECK: i16x8.splat
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK: f32x4.mul...
[truncated]

@sparker-arm sparker-arm merged commit 20340ac into llvm:main Oct 22, 2025
10 checks passed
dvbuka pushed a commit to dvbuka/llvm-project that referenced this pull request Oct 27, 2025
Lukacma pushed a commit to Lukacma/llvm-project that referenced this pull request Oct 29, 2025
aokblast pushed a commit to aokblast/llvm-project that referenced this pull request Oct 30, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants