Skip to content

Commit e45cbf9

Browse files
committed
[ARM,MVE] Update MVE_VMLA_qr for architecture change.
In revision B.q and before of the Armv8-M architecture reference manual, the vector/scalar forms of the `vmla` and `vmlas` instructions came in signed and unsigned integer forms, such as `vmla.s8 q0,q1,r2` or `vmlas.u32 q3,q4,r5`. Revision B.r has changed this. There are no longer signed and unsigned versions of these instructions, since they were functionally identical anyway. Now there is just `vmla.i8` (or `i16` or `i32`, and similarly for `vmlas`). Bit 28 of the instruction encoding, which was previously 0 for signed or 1 for unsigned, is now expected to be 0 always. This change updates LLVM to the new version of the architecture. The obsoleted encodings for unsigned integers are now decoding errors, and only the still-valid encoding is ever emitted. This shouldn't break any existing assembly code, because the old signed and unsigned versions of the mnemonic are still accepted by the assembler (which is standard practice anyway for all signedness-agnostic MVE integer instructions). Reviewed By: dmgreen, lenary Differential Revision: https://reviews.llvm.org/D138827
1 parent a580d2e commit e45cbf9

File tree

15 files changed

+170
-167
lines changed

15 files changed

+170
-167
lines changed

llvm/lib/Target/ARM/ARMInstrMVE.td

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5625,7 +5625,7 @@ class MVE_VFMAMLA_qr<string iname, string suffix,
56255625

56265626
multiclass MVE_VMLA_qr_multi<string iname, MVEVectorVTInfo VTI,
56275627
bit scalar_addend> {
5628-
def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,
5628+
def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, 0b0, VTI.Size,
56295629
scalar_addend, VTI.Size>;
56305630
defvar Inst = !cast<Instruction>(NAME);
56315631
defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # iname # "_n_predicated");
@@ -5635,10 +5635,7 @@ multiclass MVE_VMLA_qr_multi<string iname, MVEVectorVTInfo VTI,
56355635
defvar s = (i32 rGPR:$s);
56365636
defvar pred = (VTI.Pred VCCR:$pred);
56375637

5638-
// The signed and unsigned variants of this instruction have different
5639-
// encodings, but they're functionally identical. For the sake of
5640-
// determinism, we generate only the unsigned variant.
5641-
if VTI.Unsigned then let Predicates = [HasMVEInt] in {
5638+
let Predicates = [HasMVEInt] in {
56425639
if scalar_addend then {
56435640
def : Pat<(VTI.Vec (add (mul v1, v2), vs)),
56445641
(VTI.Vec (Inst v1, v2, s))>;
@@ -5652,19 +5649,13 @@ multiclass MVE_VMLA_qr_multi<string iname, MVEVectorVTInfo VTI,
56525649
}
56535650
}
56545651

5655-
defm MVE_VMLA_qr_s8 : MVE_VMLA_qr_multi<"vmla", MVE_v16s8, 0b0>;
5656-
defm MVE_VMLA_qr_s16 : MVE_VMLA_qr_multi<"vmla", MVE_v8s16, 0b0>;
5657-
defm MVE_VMLA_qr_s32 : MVE_VMLA_qr_multi<"vmla", MVE_v4s32, 0b0>;
5658-
defm MVE_VMLA_qr_u8 : MVE_VMLA_qr_multi<"vmla", MVE_v16u8, 0b0>;
5659-
defm MVE_VMLA_qr_u16 : MVE_VMLA_qr_multi<"vmla", MVE_v8u16, 0b0>;
5660-
defm MVE_VMLA_qr_u32 : MVE_VMLA_qr_multi<"vmla", MVE_v4u32, 0b0>;
5652+
defm MVE_VMLA_qr_i8 : MVE_VMLA_qr_multi<"vmla", MVE_v16i8, 0b0>;
5653+
defm MVE_VMLA_qr_i16 : MVE_VMLA_qr_multi<"vmla", MVE_v8i16, 0b0>;
5654+
defm MVE_VMLA_qr_i32 : MVE_VMLA_qr_multi<"vmla", MVE_v4i32, 0b0>;
56615655

5662-
defm MVE_VMLAS_qr_s8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16s8, 0b1>;
5663-
defm MVE_VMLAS_qr_s16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8s16, 0b1>;
5664-
defm MVE_VMLAS_qr_s32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4s32, 0b1>;
5665-
defm MVE_VMLAS_qr_u8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16u8, 0b1>;
5666-
defm MVE_VMLAS_qr_u16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8u16, 0b1>;
5667-
defm MVE_VMLAS_qr_u32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4u32, 0b1>;
5656+
defm MVE_VMLAS_qr_i8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16i8, 0b1>;
5657+
defm MVE_VMLAS_qr_i16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8i16, 0b1>;
5658+
defm MVE_VMLAS_qr_i32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4i32, 0b1>;
56685659

56695660
multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
56705661
bit scalar_addend> {

llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture re
2424
; CHECK-NEXT: vpst
2525
; CHECK-NEXT: vldrbt.u32 q2, [r1], #4
2626
; CHECK-NEXT: vmov q1, q0
27-
; CHECK-NEXT: vmla.u32 q0, q2, r0
27+
; CHECK-NEXT: vmla.i32 q0, q2, r0
2828
; CHECK-NEXT: le lr, .LBB0_2
2929
; CHECK-NEXT: @ %bb.3: @ %middle.block
3030
; CHECK-NEXT: vpsel q0, q0, q1
@@ -89,7 +89,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_short(i16 signext %a, i16* nocapture
8989
; CHECK-NEXT: vpst
9090
; CHECK-NEXT: vldrht.s32 q2, [r1], #8
9191
; CHECK-NEXT: vmov q1, q0
92-
; CHECK-NEXT: vmla.u32 q0, q2, r0
92+
; CHECK-NEXT: vmla.i32 q0, q2, r0
9393
; CHECK-NEXT: le lr, .LBB1_2
9494
; CHECK-NEXT: @ %bb.3: @ %middle.block
9595
; CHECK-NEXT: vpsel q0, q0, q1
@@ -154,7 +154,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_uchar(i8 zeroext %a, i8* nocapture r
154154
; CHECK-NEXT: vpst
155155
; CHECK-NEXT: vldrbt.u32 q2, [r1], #4
156156
; CHECK-NEXT: vmov q1, q0
157-
; CHECK-NEXT: vmla.u32 q0, q2, r0
157+
; CHECK-NEXT: vmla.i32 q0, q2, r0
158158
; CHECK-NEXT: le lr, .LBB2_2
159159
; CHECK-NEXT: @ %bb.3: @ %middle.block
160160
; CHECK-NEXT: vpsel q0, q0, q1
@@ -219,7 +219,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_ushort(i16 signext %a, i16* nocaptur
219219
; CHECK-NEXT: vpst
220220
; CHECK-NEXT: vldrht.u32 q2, [r1], #8
221221
; CHECK-NEXT: vmov q1, q0
222-
; CHECK-NEXT: vmla.u32 q0, q2, r0
222+
; CHECK-NEXT: vmla.i32 q0, q2, r0
223223
; CHECK-NEXT: le lr, .LBB3_2
224224
; CHECK-NEXT: @ %bb.3: @ %middle.block
225225
; CHECK-NEXT: vpsel q0, q0, q1
@@ -284,7 +284,7 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_int(i32 %a, i32* nocapture readonly
284284
; CHECK-NEXT: vpst
285285
; CHECK-NEXT: vldrwt.u32 q2, [r1], #16
286286
; CHECK-NEXT: vmov q1, q0
287-
; CHECK-NEXT: vmla.u32 q0, q2, r0
287+
; CHECK-NEXT: vmla.i32 q0, q2, r0
288288
; CHECK-NEXT: le lr, .LBB4_2
289289
; CHECK-NEXT: @ %bb.3: @ %middle.block
290290
; CHECK-NEXT: vpsel q0, q0, q1
@@ -361,7 +361,7 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly
361361
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
362362
; CHECK-NEXT: vldrb.u32 q0, [r0], #4
363363
; CHECK-NEXT: vldrb.u32 q1, [r1], #4
364-
; CHECK-NEXT: vmlas.u32 q1, q0, r2
364+
; CHECK-NEXT: vmlas.i32 q1, q0, r2
365365
; CHECK-NEXT: vstrw.32 q1, [r3], #16
366366
; CHECK-NEXT: letp lr, .LBB5_5
367367
; CHECK-NEXT: b .LBB5_11
@@ -559,7 +559,7 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_short(i16* nocapture readon
559559
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
560560
; CHECK-NEXT: vldrh.s32 q0, [r0], #8
561561
; CHECK-NEXT: vldrh.s32 q1, [r1], #8
562-
; CHECK-NEXT: vmlas.u32 q1, q0, r2
562+
; CHECK-NEXT: vmlas.i32 q1, q0, r2
563563
; CHECK-NEXT: vstrw.32 q1, [r3], #16
564564
; CHECK-NEXT: letp lr, .LBB6_2
565565
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
@@ -637,7 +637,7 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl
637637
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
638638
; CHECK-NEXT: vldrb.u32 q0, [r0], #4
639639
; CHECK-NEXT: vldrb.u32 q1, [r1], #4
640-
; CHECK-NEXT: vmlas.u32 q1, q0, r2
640+
; CHECK-NEXT: vmlas.i32 q1, q0, r2
641641
; CHECK-NEXT: vstrw.32 q1, [r3], #16
642642
; CHECK-NEXT: letp lr, .LBB7_5
643643
; CHECK-NEXT: b .LBB7_11
@@ -835,7 +835,7 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_ushort(i16* nocapture reado
835835
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
836836
; CHECK-NEXT: vldrh.u32 q0, [r0], #8
837837
; CHECK-NEXT: vldrh.u32 q1, [r1], #8
838-
; CHECK-NEXT: vmlas.u32 q1, q0, r2
838+
; CHECK-NEXT: vmlas.i32 q1, q0, r2
839839
; CHECK-NEXT: vstrw.32 q1, [r3], #16
840840
; CHECK-NEXT: letp lr, .LBB8_2
841841
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
@@ -913,7 +913,7 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly
913913
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
914914
; CHECK-NEXT: vldrw.u32 q0, [r0], #16
915915
; CHECK-NEXT: vldrw.u32 q1, [r1], #16
916-
; CHECK-NEXT: vmlas.u32 q1, q0, r2
916+
; CHECK-NEXT: vmlas.i32 q1, q0, r2
917917
; CHECK-NEXT: vstrw.32 q1, [r3], #16
918918
; CHECK-NEXT: letp lr, .LBB9_5
919919
; CHECK-NEXT: b .LBB9_11

llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(i16* noalias nocaptur
6363
; CHECK-NEXT: vshr.u16 q1, q0, #3
6464
; CHECK-NEXT: vand q1, q1, q2
6565
; CHECK-NEXT: vmov q2, q4
66-
; CHECK-NEXT: vmla.u16 q2, q1, r2
66+
; CHECK-NEXT: vmla.i16 q2, q1, r2
6767
; CHECK-NEXT: vshr.u16 q1, q2, #5
6868
; CHECK-NEXT: vshl.i16 q2, q0, #3
6969
; CHECK-NEXT: vand q3, q1, q5
@@ -74,7 +74,7 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(i16* noalias nocaptur
7474
; CHECK-NEXT: vmov q5, q4
7575
; CHECK-NEXT: vldrw.u32 q4, [sp, #48] @ 16-byte Reload
7676
; CHECK-NEXT: vshr.u16 q0, q0, #9
77-
; CHECK-NEXT: vmla.u16 q4, q2, r2
77+
; CHECK-NEXT: vmla.i16 q4, q2, r2
7878
; CHECK-NEXT: vshr.u16 q2, q4, #11
7979
; CHECK-NEXT: vmov q4, q5
8080
; CHECK-NEXT: vmov q5, q6
@@ -83,7 +83,7 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(i16* noalias nocaptur
8383
; CHECK-NEXT: vorr q1, q3, q2
8484
; CHECK-NEXT: vldrw.u32 q2, [sp, #16] @ 16-byte Reload
8585
; CHECK-NEXT: vand q0, q0, q7
86-
; CHECK-NEXT: vmla.u16 q2, q0, r2
86+
; CHECK-NEXT: vmla.i16 q2, q0, r2
8787
; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload
8888
; CHECK-NEXT: vand q0, q2, q0
8989
; CHECK-NEXT: vldrw.u32 q2, [sp, #32] @ 16-byte Reload
@@ -242,7 +242,7 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(i16* noalias no
242242
; CHECK-NEXT: vshl.i16 q1, q0, #3
243243
; CHECK-NEXT: vand q1, q1, q2
244244
; CHECK-NEXT: vmov q2, q4
245-
; CHECK-NEXT: vmla.u16 q2, q1, r3
245+
; CHECK-NEXT: vmla.i16 q2, q1, r3
246246
; CHECK-NEXT: vshr.u16 q1, q0, #3
247247
; CHECK-NEXT: vand q1, q1, q5
248248
; CHECK-NEXT: vmov.f64 d14, d10
@@ -251,11 +251,11 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(i16* noalias no
251251
; CHECK-NEXT: vmov.f64 d11, d9
252252
; CHECK-NEXT: vldrw.u32 q4, [sp, #32] @ 16-byte Reload
253253
; CHECK-NEXT: vshr.u16 q0, q0, #9
254-
; CHECK-NEXT: vmla.u16 q4, q1, r3
254+
; CHECK-NEXT: vmla.i16 q4, q1, r3
255255
; CHECK-NEXT: vldrw.u32 q1, [sp, #48] @ 16-byte Reload
256256
; CHECK-NEXT: vand q0, q0, q1
257257
; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload
258-
; CHECK-NEXT: vmla.u16 q1, q0, r3
258+
; CHECK-NEXT: vmla.i16 q1, q0, r3
259259
; CHECK-NEXT: vshr.u16 q0, q2, #11
260260
; CHECK-NEXT: vshr.u16 q2, q4, #5
261261
; CHECK-NEXT: vand q2, q2, q6

llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ body: |
162162
; CHECK: renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 4, 0, $noreg, $noreg :: (load (s128) from %ir.input_1_cast, align 4)
163163
; CHECK: renamable $q2 = MVE_VADD_qr_i32 killed renamable $q2, renamable $r3, 0, $noreg, $noreg, undef renamable $q2
164164
; CHECK: renamable $q3 = MVE_VADD_qr_i32 killed renamable $q3, renamable $r2, 0, $noreg, $noreg, undef renamable $q3
165-
; CHECK: renamable $q3 = MVE_VMLAS_qr_u32 killed renamable $q3, killed renamable $q2, renamable $r5, 0, $noreg, $noreg
165+
; CHECK: renamable $q3 = MVE_VMLAS_qr_i32 killed renamable $q3, killed renamable $q2, renamable $r5, 0, $noreg, $noreg
166166
; CHECK: renamable $q2 = MVE_VMAXu32 killed renamable $q3, renamable $q1, 0, $noreg, $noreg, undef renamable $q2
167167
; CHECK: renamable $q3 = MVE_VMINu32 renamable $q2, renamable $q0, 0, $noreg, $noreg, undef renamable $q3
168168
; CHECK: renamable $r12 = MVE_VMLADAVas32 killed renamable $r12, killed renamable $q3, killed renamable $q2, 0, killed $noreg, $noreg
@@ -210,7 +210,7 @@ body: |
210210
renamable $q2 = MVE_VADD_qr_i32 killed renamable $q2, renamable $r3, 0, $noreg, $noreg, undef renamable $q2
211211
renamable $q3 = MVE_VADD_qr_i32 killed renamable $q3, renamable $r2, 0, $noreg, $noreg, undef renamable $q3
212212
renamable $r4, dead $cpsr = tSUBi8 killed renamable $r4, 4, 14 /* CC::al */, $noreg
213-
renamable $q3 = MVE_VMLAS_qr_u32 killed renamable $q3, killed renamable $q2, renamable $r5, 0, $noreg, $noreg
213+
renamable $q3 = MVE_VMLAS_qr_i32 killed renamable $q3, killed renamable $q2, renamable $r5, 0, $noreg, $noreg
214214
MVE_VPST 2, implicit $vpr
215215
renamable $q2 = MVE_VMAXu32 killed renamable $q3, renamable $q1, 1, renamable $vpr, $noreg, undef renamable $q2
216216
renamable $q3 = MVE_VMINu32 renamable $q2, renamable $q0, 1, renamable $vpr, $noreg, undef renamable $q3

llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ define arm_aapcs_vfpcc void @non_gatscat_use1(i32* noalias nocapture readonly %d
339339
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
340340
; CHECK-NEXT: vmov q3, q0
341341
; CHECK-NEXT: vadd.i32 q2, q1, r4
342-
; CHECK-NEXT: vmla.u32 q3, q1, lr
342+
; CHECK-NEXT: vmla.i32 q3, q1, lr
343343
; CHECK-NEXT: vmul.i32 q1, q1, r12
344344
; CHECK-NEXT: vldrw.u32 q4, [q3, #24]
345345
; CHECK-NEXT: subs r2, #4
@@ -401,11 +401,11 @@ define arm_aapcs_vfpcc void @non_gatscat_use2(i32* noalias nocapture readonly %d
401401
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
402402
; CHECK-NEXT: vmov q4, q0
403403
; CHECK-NEXT: vadd.i32 q3, q2, r4
404-
; CHECK-NEXT: vmla.u32 q4, q2, lr
404+
; CHECK-NEXT: vmla.i32 q4, q2, lr
405405
; CHECK-NEXT: subs r2, #4
406406
; CHECK-NEXT: vldrw.u32 q5, [q4, #24]
407407
; CHECK-NEXT: vmov q4, q1
408-
; CHECK-NEXT: vmla.u32 q4, q2, r12
408+
; CHECK-NEXT: vmla.i32 q4, q2, r12
409409
; CHECK-NEXT: vmov q2, q3
410410
; CHECK-NEXT: vstrb.8 q5, [r1], #16
411411
; CHECK-NEXT: vstrw.32 q4, [r3]
@@ -490,7 +490,7 @@ define dso_local void @arm_mat_mult_q31(i32* noalias nocapture readonly %A, i32*
490490
; CHECK-NEXT: dls lr, r10
491491
; CHECK-NEXT: vmov.i32 q4, #0x0
492492
; CHECK-NEXT: vadd.i32 q5, q5, q0
493-
; CHECK-NEXT: vmlas.u32 q6, q2, r5
493+
; CHECK-NEXT: vmlas.i32 q6, q2, r5
494494
; CHECK-NEXT: .LBB9_3: @ %vector.body
495495
; CHECK-NEXT: @ Parent Loop BB9_1 Depth=1
496496
; CHECK-NEXT: @ Parent Loop BB9_2 Depth=2
@@ -696,7 +696,7 @@ define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16*
696696
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
697697
; CHECK-NEXT: vmov q5, q1
698698
; CHECK-NEXT: vmov.i32 q4, #0x0
699-
; CHECK-NEXT: vmlas.u32 q5, q2, r8
699+
; CHECK-NEXT: vmlas.i32 q5, q2, r8
700700
; CHECK-NEXT: dls lr, r0
701701
; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
702702
; CHECK-NEXT: .LBB10_11: @ %vector.body
@@ -909,11 +909,11 @@ define hidden arm_aapcs_vfpcc i32 @arm_depthwise_conv_s8(i8* nocapture readonly
909909
; CHECK-NEXT: vdup.32 q3, r5
910910
; CHECK-NEXT: vdup.32 q2, r7
911911
; CHECK-NEXT: vadd.i32 q4, q1, r4
912-
; CHECK-NEXT: vmla.u32 q3, q4, r2
912+
; CHECK-NEXT: vmla.i32 q3, q4, r2
913913
; CHECK-NEXT: adds r4, #113
914914
; CHECK-NEXT: vadd.i32 q4, q1, r4
915915
; CHECK-NEXT: mov r4, r8
916-
; CHECK-NEXT: vmla.u32 q2, q4, r2
916+
; CHECK-NEXT: vmla.i32 q2, q4, r2
917917
; CHECK-NEXT: .LBB11_5: @ %vector.body
918918
; CHECK-NEXT: @ Parent Loop BB11_1 Depth=1
919919
; CHECK-NEXT: @ Parent Loop BB11_2 Depth=2

llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ define dso_local void @mve_scatter_qi(i32* noalias nocapture readonly %A, i32* n
155155
; CHECK-NEXT: vpst
156156
; CHECK-NEXT: vldrwt.u32 q2, [r4], #16
157157
; CHECK-NEXT: vmul.i32 q3, q2, r1
158-
; CHECK-NEXT: vmla.u32 q0, q2, r1
158+
; CHECK-NEXT: vmla.i32 q0, q2, r1
159159
; CHECK-NEXT: vpst
160160
; CHECK-NEXT: vstrwt.32 q3, [q1, #80]!
161161
; CHECK-NEXT: le lr, .LBB2_1
@@ -243,17 +243,17 @@ define void @justoffsets(i8* noalias nocapture readonly %r, i8* noalias nocaptur
243243
; CHECK-NEXT: vldrb.u32 q5, [r0, q2]
244244
; CHECK-NEXT: adds r0, #12
245245
; CHECK-NEXT: vmul.i32 q6, q4, r11
246-
; CHECK-NEXT: vmla.u32 q6, q3, r8
247-
; CHECK-NEXT: vmla.u32 q6, q5, r12
246+
; CHECK-NEXT: vmla.i32 q6, q3, r8
247+
; CHECK-NEXT: vmla.i32 q6, q5, r12
248248
; CHECK-NEXT: vadd.i32 q6, q6, r3
249249
; CHECK-NEXT: vshr.u32 q6, q6, #16
250250
; CHECK-NEXT: vstrb.32 q6, [r1, q1]
251251
; CHECK-NEXT: vmul.i32 q6, q4, r4
252252
; CHECK-NEXT: vmul.i32 q4, q4, r10
253-
; CHECK-NEXT: vmla.u32 q6, q3, r5
254-
; CHECK-NEXT: vmla.u32 q4, q3, r7
255-
; CHECK-NEXT: vmla.u32 q6, q5, r6
256-
; CHECK-NEXT: vmla.u32 q4, q5, r9
253+
; CHECK-NEXT: vmla.i32 q6, q3, r5
254+
; CHECK-NEXT: vmla.i32 q4, q3, r7
255+
; CHECK-NEXT: vmla.i32 q6, q5, r6
256+
; CHECK-NEXT: vmla.i32 q4, q5, r9
257257
; CHECK-NEXT: vadd.i32 q6, q6, r3
258258
; CHECK-NEXT: vadd.i32 q3, q4, r3
259259
; CHECK-NEXT: vshr.u32 q6, q6, #16

0 commit comments

Comments
 (0)