678 changes: 339 additions & 339 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll

Large diffs are not rendered by default.

114 changes: 57 additions & 57 deletions llvm/test/CodeGen/RISCV/stack-store-check.ll
Original file line number Diff line number Diff line change
Expand Up @@ -32,34 +32,34 @@ define void @main() local_unnamed_addr nounwind {
; CHECK-NEXT: lw s6, %lo(U)(a0)
; CHECK-NEXT: lw s7, %lo(U+4)(a0)
; CHECK-NEXT: lw s8, %lo(U+8)(a0)
; CHECK-NEXT: lw s0, %lo(U+12)(a0)
; CHECK-NEXT: lw s2, %lo(U+12)(a0)
; CHECK-NEXT: sw zero, 612(sp)
; CHECK-NEXT: sw zero, 608(sp)
; CHECK-NEXT: sw zero, 604(sp)
; CHECK-NEXT: sw zero, 600(sp)
; CHECK-NEXT: sw s0, 596(sp)
; CHECK-NEXT: sw s2, 596(sp)
; CHECK-NEXT: sw s8, 592(sp)
; CHECK-NEXT: sw s7, 588(sp)
; CHECK-NEXT: addi a0, sp, 616
; CHECK-NEXT: addi a1, sp, 600
; CHECK-NEXT: addi a2, sp, 584
; CHECK-NEXT: sw s6, 584(sp)
; CHECK-NEXT: call __subtf3@plt
; CHECK-NEXT: lw s3, 616(sp)
; CHECK-NEXT: lw s4, 620(sp)
; CHECK-NEXT: lw s9, 624(sp)
; CHECK-NEXT: lw s4, 616(sp)
; CHECK-NEXT: lw s5, 620(sp)
; CHECK-NEXT: lw s3, 624(sp)
; CHECK-NEXT: lw s11, 628(sp)
; CHECK-NEXT: sw s0, 548(sp)
; CHECK-NEXT: sw s2, 548(sp)
; CHECK-NEXT: sw s8, 544(sp)
; CHECK-NEXT: sw s7, 540(sp)
; CHECK-NEXT: sw s6, 536(sp)
; CHECK-NEXT: sw s11, 564(sp)
; CHECK-NEXT: sw s9, 560(sp)
; CHECK-NEXT: sw s4, 556(sp)
; CHECK-NEXT: sw s3, 560(sp)
; CHECK-NEXT: sw s5, 556(sp)
; CHECK-NEXT: addi a0, sp, 568
; CHECK-NEXT: addi a1, sp, 552
; CHECK-NEXT: addi a2, sp, 536
; CHECK-NEXT: sw s3, 552(sp)
; CHECK-NEXT: sw s4, 552(sp)
; CHECK-NEXT: call __subtf3@plt
; CHECK-NEXT: lw a0, 568(sp)
; CHECK-NEXT: sw a0, 40(sp) # 4-byte Folded Spill
Expand All @@ -68,64 +68,64 @@ define void @main() local_unnamed_addr nounwind {
; CHECK-NEXT: lw a0, 576(sp)
; CHECK-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw a0, 580(sp)
; CHECK-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw a0, 48(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw zero, 500(sp)
; CHECK-NEXT: sw zero, 496(sp)
; CHECK-NEXT: sw zero, 492(sp)
; CHECK-NEXT: sw zero, 488(sp)
; CHECK-NEXT: sw s0, 516(sp)
; CHECK-NEXT: sw s2, 516(sp)
; CHECK-NEXT: sw s8, 512(sp)
; CHECK-NEXT: sw s7, 508(sp)
; CHECK-NEXT: addi a0, sp, 520
; CHECK-NEXT: addi a1, sp, 504
; CHECK-NEXT: addi a2, sp, 488
; CHECK-NEXT: sw s6, 504(sp)
; CHECK-NEXT: call __addtf3@plt
; CHECK-NEXT: lw s2, 520(sp)
; CHECK-NEXT: lw s9, 520(sp)
; CHECK-NEXT: lw s10, 524(sp)
; CHECK-NEXT: lw s5, 528(sp)
; CHECK-NEXT: lw s0, 528(sp)
; CHECK-NEXT: sw s0, 20(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw s1, 532(sp)
; CHECK-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw s1, 16(sp) # 4-byte Folded Spill
; CHECK-NEXT: lui a0, %hi(Y1)
; CHECK-NEXT: lw a1, %lo(Y1)(a0)
; CHECK-NEXT: sw a1, 48(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw a2, %lo(Y1+4)(a0)
; CHECK-NEXT: sw a2, 52(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw a2, 12(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw a3, %lo(Y1+8)(a0)
; CHECK-NEXT: sw a3, 4(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw a3, 8(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw a0, %lo(Y1+12)(a0)
; CHECK-NEXT: sw a0, 0(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw a0, 308(sp)
; CHECK-NEXT: sw a3, 304(sp)
; CHECK-NEXT: sw a2, 300(sp)
; CHECK-NEXT: sw a1, 296(sp)
; CHECK-NEXT: sw s11, 324(sp)
; CHECK-NEXT: sw s9, 320(sp)
; CHECK-NEXT: sw s4, 316(sp)
; CHECK-NEXT: sw s3, 320(sp)
; CHECK-NEXT: sw s5, 316(sp)
; CHECK-NEXT: addi a0, sp, 328
; CHECK-NEXT: addi a1, sp, 312
; CHECK-NEXT: addi a2, sp, 296
; CHECK-NEXT: sw s3, 312(sp)
; CHECK-NEXT: sw s4, 312(sp)
; CHECK-NEXT: call __multf3@plt
; CHECK-NEXT: lw a0, 328(sp)
; CHECK-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw a0, 332(sp)
; CHECK-NEXT: sw a0, 36(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw a0, 336(sp)
; CHECK-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw a0, 340(sp)
; CHECK-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw s0, 468(sp)
; CHECK-NEXT: lw s4, 340(sp)
; CHECK-NEXT: sw s2, 468(sp)
; CHECK-NEXT: sw s8, 464(sp)
; CHECK-NEXT: sw s7, 460(sp)
; CHECK-NEXT: sw s6, 456(sp)
; CHECK-NEXT: sw s1, 452(sp)
; CHECK-NEXT: sw s5, 448(sp)
; CHECK-NEXT: sw s0, 448(sp)
; CHECK-NEXT: sw s10, 444(sp)
; CHECK-NEXT: addi a0, sp, 472
; CHECK-NEXT: addi a1, sp, 456
; CHECK-NEXT: addi a2, sp, 440
; CHECK-NEXT: sw s2, 440(sp)
; CHECK-NEXT: sw s9, 440(sp)
; CHECK-NEXT: call __addtf3@plt
; CHECK-NEXT: lw a3, 472(sp)
; CHECK-NEXT: lw a0, 476(sp)
Expand All @@ -152,43 +152,44 @@ define void @main() local_unnamed_addr nounwind {
; CHECK-NEXT: sw a2, %lo(X+8)(a4)
; CHECK-NEXT: sw a3, %lo(X+4)(a4)
; CHECK-NEXT: sw a0, %lo(X)(a4)
; CHECK-NEXT: lw s8, 0(sp) # 4-byte Folded Reload
; CHECK-NEXT: lw s8, 4(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw s8, 212(sp)
; CHECK-NEXT: lw s7, 4(sp) # 4-byte Folded Reload
; CHECK-NEXT: lw s7, 8(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw s7, 208(sp)
; CHECK-NEXT: lw s11, 12(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw s11, 204(sp)
; CHECK-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw a0, 204(sp)
; CHECK-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw a0, 200(sp)
; CHECK-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw s6, 228(sp)
; CHECK-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw s4, 224(sp)
; CHECK-NEXT: lw s0, 32(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw s0, 220(sp)
; CHECK-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw a0, 228(sp)
; CHECK-NEXT: lw s3, 24(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw s3, 224(sp)
; CHECK-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw s2, 220(sp)
; CHECK-NEXT: addi a0, sp, 232
; CHECK-NEXT: addi a1, sp, 216
; CHECK-NEXT: addi a2, sp, 200
; CHECK-NEXT: lw s1, 40(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw s1, 216(sp)
; CHECK-NEXT: call __multf3@plt
; CHECK-NEXT: lw a0, 232(sp)
; CHECK-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw s3, 236(sp)
; CHECK-NEXT: lw s9, 240(sp)
; CHECK-NEXT: lw s11, 244(sp)
; CHECK-NEXT: lw s5, 232(sp)
; CHECK-NEXT: lw a0, 236(sp)
; CHECK-NEXT: sw a0, 0(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw s6, 240(sp)
; CHECK-NEXT: lw s0, 244(sp)
; CHECK-NEXT: sw zero, 356(sp)
; CHECK-NEXT: sw zero, 352(sp)
; CHECK-NEXT: sw zero, 348(sp)
; CHECK-NEXT: sw zero, 344(sp)
; CHECK-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
; CHECK-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw a0, 372(sp)
; CHECK-NEXT: sw s5, 368(sp)
; CHECK-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw a0, 368(sp)
; CHECK-NEXT: sw s10, 364(sp)
; CHECK-NEXT: addi a0, sp, 376
; CHECK-NEXT: addi a1, sp, 360
; CHECK-NEXT: addi a2, sp, 344
; CHECK-NEXT: sw s2, 360(sp)
; CHECK-NEXT: sw s9, 360(sp)
; CHECK-NEXT: call __multf3@plt
; CHECK-NEXT: lw a0, 376(sp)
; CHECK-NEXT: lw a1, 388(sp)
Expand All @@ -199,12 +200,12 @@ define void @main() local_unnamed_addr nounwind {
; CHECK-NEXT: sw a2, %lo(S+8)(a4)
; CHECK-NEXT: sw a3, %lo(S+4)(a4)
; CHECK-NEXT: sw a0, %lo(S)(a4)
; CHECK-NEXT: sw s6, 260(sp)
; CHECK-NEXT: sw s4, 256(sp)
; CHECK-NEXT: sw s0, 252(sp)
; CHECK-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw a0, 260(sp)
; CHECK-NEXT: sw s3, 256(sp)
; CHECK-NEXT: sw s2, 252(sp)
; CHECK-NEXT: sw s1, 248(sp)
; CHECK-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw a0, 276(sp)
; CHECK-NEXT: sw s4, 276(sp)
; CHECK-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw a0, 272(sp)
; CHECK-NEXT: lw a0, 36(sp) # 4-byte Folded Reload
Expand All @@ -228,14 +229,14 @@ define void @main() local_unnamed_addr nounwind {
; CHECK-NEXT: sw zero, 160(sp)
; CHECK-NEXT: sw zero, 156(sp)
; CHECK-NEXT: sw zero, 152(sp)
; CHECK-NEXT: sw s11, 180(sp)
; CHECK-NEXT: sw s9, 176(sp)
; CHECK-NEXT: sw s3, 172(sp)
; CHECK-NEXT: sw s0, 180(sp)
; CHECK-NEXT: sw s6, 176(sp)
; CHECK-NEXT: lw a0, 0(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw a0, 172(sp)
; CHECK-NEXT: addi a0, sp, 184
; CHECK-NEXT: addi a1, sp, 168
; CHECK-NEXT: addi a2, sp, 152
; CHECK-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw a3, 168(sp)
; CHECK-NEXT: sw s5, 168(sp)
; CHECK-NEXT: call __addtf3@plt
; CHECK-NEXT: lw a0, 184(sp)
; CHECK-NEXT: lw a1, 196(sp)
Expand All @@ -252,12 +253,11 @@ define void @main() local_unnamed_addr nounwind {
; CHECK-NEXT: sw zero, 104(sp)
; CHECK-NEXT: sw s8, 132(sp)
; CHECK-NEXT: sw s7, 128(sp)
; CHECK-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw a0, 124(sp)
; CHECK-NEXT: sw s11, 124(sp)
; CHECK-NEXT: addi a0, sp, 136
; CHECK-NEXT: addi a1, sp, 120
; CHECK-NEXT: addi a2, sp, 104
; CHECK-NEXT: lw a3, 48(sp) # 4-byte Folded Reload
; CHECK-NEXT: lw a3, 52(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw a3, 120(sp)
; CHECK-NEXT: call __multf3@plt
; CHECK-NEXT: lw a3, 136(sp)
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1028,12 +1028,12 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) {
; CHECK-NEXT: vldr d0, .LCPI13_0
; CHECK-NEXT: vmov r9, r4, d5
; CHECK-NEXT: vmov r2, r6, d0
; CHECK-NEXT: vmov.f32 s20, s8
; CHECK-NEXT: vmov.f32 s22, s6
; CHECK-NEXT: vmov.f32 s22, s8
; CHECK-NEXT: vmov.f32 s20, s6
; CHECK-NEXT: vmov.f32 s18, s4
; CHECK-NEXT: vmov.f32 s24, s2
; CHECK-NEXT: vmov.f32 s21, s9
; CHECK-NEXT: vmov.f32 s23, s7
; CHECK-NEXT: vmov.f32 s23, s9
; CHECK-NEXT: vmov.f32 s21, s7
; CHECK-NEXT: vmov.f32 s19, s5
; CHECK-NEXT: vmov.f32 s25, s3
; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill
Expand All @@ -1054,11 +1054,11 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) {
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mov r10, r0
; CHECK-NEXT: vmov r8, r0, d11
; CHECK-NEXT: vmov r8, r0, d10
; CHECK-NEXT: cmp.w r11, #0
; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: vmov r7, r5, d10
; CHECK-NEXT: vmov r7, r5, d11
; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: vmov r1, r0, d12
; CHECK-NEXT: strd r1, r0, [sp, #12] @ 8-byte Folded Spill
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -691,11 +691,11 @@ define arm_aapcs_vfpcc <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) {
; CHECK-NEXT: vmov r5, r6, d4
; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: vmov.f32 s18, s6
; CHECK-NEXT: vmov.f32 s20, s4
; CHECK-NEXT: vmov.f32 s20, s6
; CHECK-NEXT: vmov.f32 s18, s4
; CHECK-NEXT: vmov.f32 s22, s2
; CHECK-NEXT: vmov.f32 s19, s7
; CHECK-NEXT: vmov.f32 s21, s5
; CHECK-NEXT: vmov.f32 s21, s7
; CHECK-NEXT: vmov.f32 s19, s5
; CHECK-NEXT: vmov.f32 s23, s3
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r6
Expand All @@ -716,11 +716,11 @@ define arm_aapcs_vfpcc <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) {
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vmov r8, r1, d11
; CHECK-NEXT: cmp.w r11, #0
; CHECK-NEXT: vmov r6, r9, d9
; CHECK-NEXT: vmov r6, r9, d10
; CHECK-NEXT: csel r0, r0, r11, ne
; CHECK-NEXT: cmp.w r10, #0
; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: vmov r2, r1, d10
; CHECK-NEXT: vmov r2, r1, d9
; CHECK-NEXT: strd r2, r1, [sp, #16] @ 8-byte Folded Spill
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r0, #-1
Expand Down Expand Up @@ -859,13 +859,13 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
; CHECK-NEXT: vldr d0, .LCPI13_0
; CHECK-NEXT: vmov r5, r6, d5
; CHECK-NEXT: vmov r11, r3, d0
; CHECK-NEXT: vmov.f32 s18, s8
; CHECK-NEXT: vmov.f32 s22, s8
; CHECK-NEXT: vmov.f32 s20, s6
; CHECK-NEXT: vmov.f32 s22, s4
; CHECK-NEXT: vmov.f32 s18, s4
; CHECK-NEXT: vmov.f32 s24, s2
; CHECK-NEXT: vmov.f32 s19, s9
; CHECK-NEXT: vmov.f32 s23, s9
; CHECK-NEXT: vmov.f32 s21, s7
; CHECK-NEXT: vmov.f32 s23, s5
; CHECK-NEXT: vmov.f32 s19, s5
; CHECK-NEXT: vmov.f32 s25, s3
; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: mov r0, r5
Expand All @@ -888,7 +888,7 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vmov r10, r1, d10
; CHECK-NEXT: cmp.w r8, #0
; CHECK-NEXT: vmov r5, r6, d9
; CHECK-NEXT: vmov r5, r6, d11
; CHECK-NEXT: csel r0, r0, r8, ne
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill
Expand All @@ -914,7 +914,7 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r6
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vmov r2, r1, d11
; CHECK-NEXT: vmov r2, r1, d9
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: csel r0, r0, r4, ne
; CHECK-NEXT: cmp.w r11, #0
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/Thumb2/mve-simple-arith.ll
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,13 @@ define arm_aapcs_vfpcc <2 x double> @add_float64_t(<2 x double> %src1, <2 x doub
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov q4, q1
; CHECK-NEXT: vmov q5, q0
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: vmov r2, r3, d11
; CHECK-NEXT: vmov q5, q1
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, r1, d11
; CHECK-NEXT: vmov r2, r3, d9
; CHECK-NEXT: bl __aeabi_dadd
; CHECK-NEXT: vmov lr, r12, d8
; CHECK-NEXT: vmov r2, r3, d10
; CHECK-NEXT: vmov lr, r12, d10
; CHECK-NEXT: vmov r2, r3, d8
; CHECK-NEXT: vmov d9, r0, r1
; CHECK-NEXT: mov r0, lr
; CHECK-NEXT: mov r1, r12
Expand Down Expand Up @@ -243,13 +243,13 @@ define arm_aapcs_vfpcc <2 x double> @sub_float64_t(<2 x double> %src1, <2 x doub
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov q4, q1
; CHECK-NEXT: vmov q5, q0
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: vmov r2, r3, d11
; CHECK-NEXT: vmov q5, q1
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, r1, d11
; CHECK-NEXT: vmov r2, r3, d9
; CHECK-NEXT: bl __aeabi_dsub
; CHECK-NEXT: vmov lr, r12, d8
; CHECK-NEXT: vmov r2, r3, d10
; CHECK-NEXT: vmov lr, r12, d10
; CHECK-NEXT: vmov r2, r3, d8
; CHECK-NEXT: vmov d9, r0, r1
; CHECK-NEXT: mov r0, lr
; CHECK-NEXT: mov r1, r12
Expand Down Expand Up @@ -376,13 +376,13 @@ define arm_aapcs_vfpcc <2 x double> @mul_float64_t(<2 x double> %src1, <2 x doub
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov q4, q1
; CHECK-NEXT: vmov q5, q0
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: vmov r2, r3, d11
; CHECK-NEXT: vmov q5, q1
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, r1, d11
; CHECK-NEXT: vmov r2, r3, d9
; CHECK-NEXT: bl __aeabi_dmul
; CHECK-NEXT: vmov lr, r12, d8
; CHECK-NEXT: vmov r2, r3, d10
; CHECK-NEXT: vmov lr, r12, d10
; CHECK-NEXT: vmov r2, r3, d8
; CHECK-NEXT: vmov d9, r0, r1
; CHECK-NEXT: mov r0, lr
; CHECK-NEXT: mov r1, r12
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -68,39 +68,39 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: and r0, r3, #1
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: rsbs r1, r0, #0
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: movs r2, #9
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: bl __aeabi_ldivmod
; CHECK-NEXT: and r0, r4, #1
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: and r0, r5, #1
; CHECK-NEXT: mov r7, r2
; CHECK-NEXT: rsbs r1, r0, #0
; CHECK-NEXT: mov r7, r3
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r4, r3
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: movs r2, #9
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: bl __aeabi_ldivmod
; CHECK-NEXT: ldr r1, [sp, #44]
; CHECK-NEXT: vmov.32 d8[0], r2
; CHECK-NEXT: ldr r0, [sp, #40]
; CHECK-NEXT: mov r4, r3
; CHECK-NEXT: mov r5, r3
; CHECK-NEXT: and r1, r1, #1
; CHECK-NEXT: mvn r2, #8
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: mov.w r3, #-1
; CHECK-NEXT: vmov.32 d9[0], r6
; CHECK-NEXT: vmov.32 d9[0], r7
; CHECK-NEXT: bl __aeabi_ldivmod
; CHECK-NEXT: vmov.32 d16[0], r2
; CHECK-NEXT: adr r0, .LCPI3_0
; CHECK-NEXT: vmov.32 d9[1], r7
; CHECK-NEXT: vmov.32 d9[1], r4
; CHECK-NEXT: vld1.64 {d18, d19}, [r0:128]
; CHECK-NEXT: adr r0, .LCPI3_1
; CHECK-NEXT: vmov.32 d16[1], r3
; CHECK-NEXT: vmov.32 d8[1], r4
; CHECK-NEXT: vmov.32 d8[1], r5
; CHECK-NEXT: vand q8, q8, q9
; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128]
; CHECK-NEXT: adr r0, .LCPI3_2
Expand Down
40 changes: 20 additions & 20 deletions llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,32 @@
define fastcc void @fht(float* %fz, i16 signext %n) {
; CHECK-LABEL: fht:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: xorps %xmm2, %xmm2
; CHECK-NEXT: subss %xmm1, %xmm2
; CHECK-NEXT: movaps %xmm1, %xmm3
; CHECK-NEXT: mulss %xmm0, %xmm3
; CHECK-NEXT: addss %xmm1, %xmm3
; CHECK-NEXT: movaps %xmm1, %xmm4
; CHECK-NEXT: subss %xmm3, %xmm4
; CHECK-NEXT: addss %xmm1, %xmm3
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: subss %xmm3, %xmm1
; CHECK-NEXT: movaps %xmm3, %xmm4
; CHECK-NEXT: mulss %xmm0, %xmm4
; CHECK-NEXT: addss %xmm3, %xmm4
; CHECK-NEXT: movaps %xmm3, %xmm2
; CHECK-NEXT: subss %xmm4, %xmm2
; CHECK-NEXT: addss %xmm3, %xmm4
; CHECK-NEXT: xorps %xmm5, %xmm5
; CHECK-NEXT: subss %xmm2, %xmm5
; CHECK-NEXT: addss %xmm0, %xmm2
; CHECK-NEXT: mulss %xmm0, %xmm3
; CHECK-NEXT: subss %xmm1, %xmm5
; CHECK-NEXT: addss %xmm0, %xmm1
; CHECK-NEXT: mulss %xmm0, %xmm4
; CHECK-NEXT: mulss %xmm0, %xmm5
; CHECK-NEXT: addss %xmm3, %xmm5
; CHECK-NEXT: addss %xmm4, %xmm5
; CHECK-NEXT: addss %xmm0, %xmm5
; CHECK-NEXT: movss %xmm5, 0
; CHECK-NEXT: movss %xmm1, (%ecx)
; CHECK-NEXT: addss %xmm0, %xmm1
; CHECK-NEXT: movss %xmm1, 0
; CHECK-NEXT: movss %xmm3, (%ecx)
; CHECK-NEXT: addss %xmm0, %xmm3
; CHECK-NEXT: movss %xmm3, 0
; CHECK-NEXT: mulss %xmm0, %xmm1
; CHECK-NEXT: mulss %xmm0, %xmm2
; CHECK-NEXT: mulss %xmm0, %xmm4
; CHECK-NEXT: addss %xmm2, %xmm4
; CHECK-NEXT: addss %xmm0, %xmm4
; CHECK-NEXT: movss %xmm4, (%ecx)
; CHECK-NEXT: addss %xmm1, %xmm2
; CHECK-NEXT: addss %xmm0, %xmm2
; CHECK-NEXT: movss %xmm2, (%ecx)
; CHECK-NEXT: retl
entry:
br i1 true, label %bb171.preheader, label %bb431
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/X86/2008-04-16-ReMatBug.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,20 @@ define i16 @SQLDriversW(i8* %henv, i16 zeroext %fDir, i32* %szDrvDesc, i16 sign
; CHECK-NEXT: movw $0, 40(%edi)
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: leal (,%ecx,4), %eax
; CHECK-NEXT: leal (,%ebx,4), %ecx
; CHECK-NEXT: leal (,%ecx,4), %ecx
; CHECK-NEXT: leal (,%ebx,4), %edx
; CHECK-NEXT: subl $12, %esp
; CHECK-NEXT: movzwl %bp, %edx
; CHECK-NEXT: cwtl
; CHECK-NEXT: movzwl %bp, %eax
; CHECK-NEXT: movswl %cx, %ecx
; CHECK-NEXT: movswl %dx, %edx
; CHECK-NEXT: pushl $87
; CHECK-NEXT: pushl {{[0-9]+}}(%esp)
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: pushl $0
; CHECK-NEXT: pushl {{[0-9]+}}(%esp)
; CHECK-NEXT: pushl %ecx
; CHECK-NEXT: pushl $0
; CHECK-NEXT: pushl {{[0-9]+}}(%esp)
; CHECK-NEXT: pushl %edx
; CHECK-NEXT: pushl $0
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: calll _SQLDrivers_Internal
; CHECK-NEXT: addl $48, %esp
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/X86/64-bit-shift-by-32-minus-y.ll
Original file line number Diff line number Diff line change
Expand Up @@ -337,20 +337,20 @@ define i64 @t5_cse(i64 %val, i64 %shamt, i64*%dst) nounwind {
; X32-BMI2-NEXT: pushl %ebx
; X32-BMI2-NEXT: pushl %edi
; X32-BMI2-NEXT: pushl %esi
; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-BMI2-NEXT: movl %eax, %ebx
; X32-BMI2-NEXT: addl $32, %ebx
; X32-BMI2-NEXT: adcl $0, %edi
; X32-BMI2-NEXT: movl %ebx, (%ecx)
; X32-BMI2-NEXT: movl %edi, 4(%ecx)
; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X32-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-BMI2-NEXT: movl %ebx, %edi
; X32-BMI2-NEXT: addl $32, %edi
; X32-BMI2-NEXT: adcl $0, %esi
; X32-BMI2-NEXT: movl %edi, (%ecx)
; X32-BMI2-NEXT: movl %esi, 4(%ecx)
; X32-BMI2-NEXT: movb $32, %cl
; X32-BMI2-NEXT: subb %al, %cl
; X32-BMI2-NEXT: shldl %cl, %esi, %edx
; X32-BMI2-NEXT: shlxl %ecx, %esi, %eax
; X32-BMI2-NEXT: subb %bl, %cl
; X32-BMI2-NEXT: shldl %cl, %eax, %edx
; X32-BMI2-NEXT: shlxl %ecx, %eax, %eax
; X32-BMI2-NEXT: testb $32, %cl
; X32-BMI2-NEXT: je .LBB5_2
; X32-BMI2-NEXT: # %bb.1:
Expand Down
88 changes: 44 additions & 44 deletions llvm/test/CodeGen/X86/abs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -278,25 +278,25 @@ define <4 x i32> @test_v4i32(<4 x i32> %a) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl %ebx, %edx
; X86-NEXT: negl %edx
; X86-NEXT: cmovll %ebx, %edx
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: negl %ebx
; X86-NEXT: cmovll %edi, %ebx
; X86-NEXT: movl %esi, %edi
; X86-NEXT: negl %edi
; X86-NEXT: cmovll %esi, %edi
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: negl %esi
; X86-NEXT: cmovll %edx, %esi
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: negl %edx
; X86-NEXT: cmovll %ecx, %edx
; X86-NEXT: movl %edx, 12(%eax)
; X86-NEXT: movl %esi, 8(%eax)
; X86-NEXT: movl %edi, 4(%eax)
; X86-NEXT: movl %ebx, (%eax)
; X86-NEXT: cmovll %ecx, %esi
; X86-NEXT: movl %esi, 12(%eax)
; X86-NEXT: movl %edi, 8(%eax)
; X86-NEXT: movl %ebx, 4(%eax)
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
Expand Down Expand Up @@ -339,31 +339,31 @@ define <8 x i32> @test_v8i32(<8 x i32> %a) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: subl $8, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: negl %ecx
; X86-NEXT: cmovll %edx, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ebp, %ecx
; X86-NEXT: movl %esi, %ecx
; X86-NEXT: negl %ecx
; X86-NEXT: cmovll %ebp, %ecx
; X86-NEXT: cmovll %esi, %ecx
; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: negl %ebp
; X86-NEXT: cmovll %ebx, %ebp
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: movl %ebx, %esi
; X86-NEXT: negl %esi
; X86-NEXT: cmovll %ebx, %esi
; X86-NEXT: movl %ebp, %ebx
; X86-NEXT: negl %ebx
; X86-NEXT: cmovll %edi, %ebx
; X86-NEXT: movl %esi, %edi
; X86-NEXT: cmovll %ebp, %ebx
; X86-NEXT: movl %edi, %ebp
; X86-NEXT: negl %ebp
; X86-NEXT: cmovll %edi, %ebp
; X86-NEXT: movl %eax, %edi
; X86-NEXT: negl %edi
; X86-NEXT: cmovll %esi, %edi
; X86-NEXT: movl %eax, %esi
; X86-NEXT: negl %esi
; X86-NEXT: cmovll %eax, %esi
; X86-NEXT: cmovll %eax, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: negl %eax
Expand All @@ -375,10 +375,10 @@ define <8 x i32> @test_v8i32(<8 x i32> %a) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ecx, 28(%edx)
; X86-NEXT: movl %eax, 24(%edx)
; X86-NEXT: movl %esi, 20(%edx)
; X86-NEXT: movl %edi, 16(%edx)
; X86-NEXT: movl %edi, 20(%edx)
; X86-NEXT: movl %ebp, 16(%edx)
; X86-NEXT: movl %ebx, 12(%edx)
; X86-NEXT: movl %ebp, 8(%edx)
; X86-NEXT: movl %esi, 8(%edx)
; X86-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-NEXT: movl %eax, 4(%edx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
Expand Down Expand Up @@ -415,31 +415,31 @@ define <8 x i16> @test_v8i16(<8 x i16> %a) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: negw %cx
; X86-NEXT: cmovlw %dx, %cx
; X86-NEXT: movw %cx, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill
; X86-NEXT: movl %ebp, %ecx
; X86-NEXT: movl %esi, %ecx
; X86-NEXT: negw %cx
; X86-NEXT: cmovlw %bp, %cx
; X86-NEXT: cmovlw %si, %cx
; X86-NEXT: movw %cx, (%esp) # 2-byte Spill
; X86-NEXT: movl %ebx, %ebp
; X86-NEXT: negw %bp
; X86-NEXT: cmovlw %bx, %bp
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: movl %ebx, %esi
; X86-NEXT: negw %si
; X86-NEXT: cmovlw %bx, %si
; X86-NEXT: movl %ebp, %ebx
; X86-NEXT: negw %bx
; X86-NEXT: cmovlw %di, %bx
; X86-NEXT: movl %esi, %edi
; X86-NEXT: cmovlw %bp, %bx
; X86-NEXT: movl %edi, %ebp
; X86-NEXT: negw %bp
; X86-NEXT: cmovlw %di, %bp
; X86-NEXT: movl %eax, %edi
; X86-NEXT: negw %di
; X86-NEXT: cmovlw %si, %di
; X86-NEXT: movl %eax, %esi
; X86-NEXT: negw %si
; X86-NEXT: cmovlw %ax, %si
; X86-NEXT: cmovlw %ax, %di
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: negw %ax
Expand All @@ -451,10 +451,10 @@ define <8 x i16> @test_v8i16(<8 x i16> %a) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movw %cx, 14(%edx)
; X86-NEXT: movw %ax, 12(%edx)
; X86-NEXT: movw %si, 10(%edx)
; X86-NEXT: movw %di, 8(%edx)
; X86-NEXT: movw %di, 10(%edx)
; X86-NEXT: movw %bp, 8(%edx)
; X86-NEXT: movw %bx, 6(%edx)
; X86-NEXT: movw %bp, 4(%edx)
; X86-NEXT: movw %si, 4(%edx)
; X86-NEXT: movzwl (%esp), %eax # 2-byte Folded Reload
; X86-NEXT: movw %ax, 2(%edx)
; X86-NEXT: movzwl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 2-byte Folded Reload
Expand Down
72 changes: 36 additions & 36 deletions llvm/test/CodeGen/X86/avx512-calling-conv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1564,11 +1564,11 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %k1, %ebx
; KNL_X32-NEXT: kshiftrw $1, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %esi
; KNL_X32-NEXT: kmovw %k1, %ebp
; KNL_X32-NEXT: kshiftrw $2, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %edi
; KNL_X32-NEXT: kmovw %k1, %esi
; KNL_X32-NEXT: kshiftrw $3, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %ebp
; KNL_X32-NEXT: kmovw %k1, %edi
; KNL_X32-NEXT: kshiftrw $4, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %edx
; KNL_X32-NEXT: kshiftrw $5, %k0, %k1
Expand All @@ -1578,66 +1578,66 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL_X32-NEXT: movb %bl, 2(%eax)
; KNL_X32-NEXT: kmovw %k0, %ebx
; KNL_X32-NEXT: andl $1, %ebx
; KNL_X32-NEXT: andl $1, %esi
; KNL_X32-NEXT: leal (%ebx,%esi,2), %esi
; KNL_X32-NEXT: kmovw %k1, %ebx
; KNL_X32-NEXT: andl $1, %ebp
; KNL_X32-NEXT: leal (%ebx,%ebp,2), %ebx
; KNL_X32-NEXT: kmovw %k1, %ebp
; KNL_X32-NEXT: kshiftrw $7, %k0, %k1
; KNL_X32-NEXT: andl $1, %esi
; KNL_X32-NEXT: leal (%ebx,%esi,4), %ebx
; KNL_X32-NEXT: kmovw %k1, %esi
; KNL_X32-NEXT: kshiftrw $8, %k0, %k1
; KNL_X32-NEXT: andl $1, %edi
; KNL_X32-NEXT: leal (%esi,%edi,4), %esi
; KNL_X32-NEXT: leal (%ebx,%edi,8), %ebx
; KNL_X32-NEXT: kmovw %k1, %edi
; KNL_X32-NEXT: kshiftrw $8, %k0, %k1
; KNL_X32-NEXT: andl $1, %ebp
; KNL_X32-NEXT: leal (%esi,%ebp,8), %esi
; KNL_X32-NEXT: kmovw %k1, %ebp
; KNL_X32-NEXT: kshiftrw $9, %k0, %k1
; KNL_X32-NEXT: andl $1, %edx
; KNL_X32-NEXT: shll $4, %edx
; KNL_X32-NEXT: orl %esi, %edx
; KNL_X32-NEXT: kmovw %k1, %esi
; KNL_X32-NEXT: orl %ebx, %edx
; KNL_X32-NEXT: kmovw %k1, %ebx
; KNL_X32-NEXT: kshiftrw $10, %k0, %k1
; KNL_X32-NEXT: andl $1, %ecx
; KNL_X32-NEXT: shll $5, %ecx
; KNL_X32-NEXT: orl %edx, %ecx
; KNL_X32-NEXT: kmovw %k1, %edx
; KNL_X32-NEXT: kshiftrw $11, %k0, %k1
; KNL_X32-NEXT: andl $1, %ebx
; KNL_X32-NEXT: shll $6, %ebx
; KNL_X32-NEXT: andl $1, %edi
; KNL_X32-NEXT: shll $7, %edi
; KNL_X32-NEXT: orl %ebx, %edi
; KNL_X32-NEXT: kmovw %k1, %ebx
; KNL_X32-NEXT: kshiftrw $12, %k0, %k1
; KNL_X32-NEXT: andl $1, %ebp
; KNL_X32-NEXT: shll $8, %ebp
; KNL_X32-NEXT: orl %edi, %ebp
; KNL_X32-NEXT: kmovw %k1, %edi
; KNL_X32-NEXT: kshiftrw $13, %k0, %k1
; KNL_X32-NEXT: shll $6, %ebp
; KNL_X32-NEXT: andl $1, %esi
; KNL_X32-NEXT: shll $9, %esi
; KNL_X32-NEXT: shll $7, %esi
; KNL_X32-NEXT: orl %ebp, %esi
; KNL_X32-NEXT: kmovw %k1, %ebp
; KNL_X32-NEXT: kshiftrw $12, %k0, %k1
; KNL_X32-NEXT: andl $1, %edi
; KNL_X32-NEXT: shll $8, %edi
; KNL_X32-NEXT: orl %esi, %edi
; KNL_X32-NEXT: kmovw %k1, %esi
; KNL_X32-NEXT: kshiftrw $13, %k0, %k1
; KNL_X32-NEXT: andl $1, %ebx
; KNL_X32-NEXT: shll $9, %ebx
; KNL_X32-NEXT: orl %edi, %ebx
; KNL_X32-NEXT: kmovw %k1, %edi
; KNL_X32-NEXT: kshiftrw $14, %k0, %k1
; KNL_X32-NEXT: andl $1, %edx
; KNL_X32-NEXT: shll $10, %edx
; KNL_X32-NEXT: orl %esi, %edx
; KNL_X32-NEXT: kmovw %k1, %esi
; KNL_X32-NEXT: orl %ebx, %edx
; KNL_X32-NEXT: kmovw %k1, %ebx
; KNL_X32-NEXT: kshiftrw $15, %k0, %k0
; KNL_X32-NEXT: orl %ecx, %edx
; KNL_X32-NEXT: kmovw %k0, %ecx
; KNL_X32-NEXT: andl $1, %ebx
; KNL_X32-NEXT: shll $11, %ebx
; KNL_X32-NEXT: andl $1, %edi
; KNL_X32-NEXT: shll $12, %edi
; KNL_X32-NEXT: orl %ebx, %edi
; KNL_X32-NEXT: andl $1, %ebp
; KNL_X32-NEXT: shll $13, %ebp
; KNL_X32-NEXT: orl %edi, %ebp
; KNL_X32-NEXT: shll $11, %ebp
; KNL_X32-NEXT: andl $1, %esi
; KNL_X32-NEXT: shll $14, %esi
; KNL_X32-NEXT: shll $12, %esi
; KNL_X32-NEXT: orl %ebp, %esi
; KNL_X32-NEXT: andl $1, %edi
; KNL_X32-NEXT: shll $13, %edi
; KNL_X32-NEXT: orl %esi, %edi
; KNL_X32-NEXT: andl $1, %ebx
; KNL_X32-NEXT: shll $14, %ebx
; KNL_X32-NEXT: orl %edi, %ebx
; KNL_X32-NEXT: andl $1, %ecx
; KNL_X32-NEXT: shll $15, %ecx
; KNL_X32-NEXT: orl %esi, %ecx
; KNL_X32-NEXT: orl %ebx, %ecx
; KNL_X32-NEXT: orl %edx, %ecx
; KNL_X32-NEXT: movw %cx, (%eax)
; KNL_X32-NEXT: addl $20, %esp
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -947,35 +947,35 @@ define dso_local x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %
; X32-NEXT: movl %eax, %ebx
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: subl %ecx, %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl %esi, %ebp
; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp
; X32-NEXT: imull %ebp, %ebx
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: subl %edi, %ebp
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X32-NEXT: movl %ebp, %eax
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
; X32-NEXT: imull %eax, %ebx
; X32-NEXT: movl %edx, %eax
; X32-NEXT: subl %edi, %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl %edx, %ecx
; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: imull %ebp, %ecx
; X32-NEXT: imull %eax, %ecx
; X32-NEXT: addl %ecx, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X32-NEXT: movl %edi, %ebp
; X32-NEXT: subl {{[0-9]+}}(%esp), %ebp
; X32-NEXT: movl %edi, %esi
; X32-NEXT: subl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: subl {{[0-9]+}}(%esp), %eax
; X32-NEXT: imull %ebp, %eax
; X32-NEXT: imull %esi, %eax
; X32-NEXT: addl %eax, %ebx
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X32-NEXT: movl (%esp), %ebp # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
; X32-NEXT: movl (%esp), %esi # 4-byte Reload
; X32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X32-NEXT: addl {{[0-9]+}}(%esp), %edi
; X32-NEXT: addl {{[0-9]+}}(%esp), %esi
; X32-NEXT: imull %eax, %esi
; X32-NEXT: addl {{[0-9]+}}(%esp), %ebp
; X32-NEXT: imull %eax, %ebp
; X32-NEXT: addl {{[0-9]+}}(%esp), %edx
; X32-NEXT: imull %ebp, %edx
; X32-NEXT: addl %esi, %edx
; X32-NEXT: imull %esi, %edx
; X32-NEXT: addl %ebp, %edx
; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: imull %edi, %ecx
; X32-NEXT: addl %edx, %ecx
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/X86/avx512-select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -556,22 +556,22 @@ define void @vselect_v1i1(<1 x i1>* %w, <1 x i1>* %x, <1 x i1>* %y) nounwind {
; X86-AVX512F-LABEL: vselect_v1i1:
; X86-AVX512F: # %bb.0:
; X86-AVX512F-NEXT: pushl %esi
; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX512F-NEXT: movzbl (%edx), %esi
; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX512F-NEXT: movzbl (%eax), %esi
; X86-AVX512F-NEXT: kmovw %esi, %k0
; X86-AVX512F-NEXT: movzbl (%edx), %edx
; X86-AVX512F-NEXT: kmovw %edx, %k1
; X86-AVX512F-NEXT: movzbl (%ecx), %ecx
; X86-AVX512F-NEXT: kmovw %ecx, %k1
; X86-AVX512F-NEXT: movzbl (%eax), %eax
; X86-AVX512F-NEXT: kmovw %eax, %k2
; X86-AVX512F-NEXT: kmovw %ecx, %k2
; X86-AVX512F-NEXT: kandnw %k1, %k2, %k1
; X86-AVX512F-NEXT: kandw %k2, %k0, %k0
; X86-AVX512F-NEXT: korw %k1, %k0, %k0
; X86-AVX512F-NEXT: kshiftlw $15, %k0, %k0
; X86-AVX512F-NEXT: kshiftrw $15, %k0, %k0
; X86-AVX512F-NEXT: kmovw %k0, %eax
; X86-AVX512F-NEXT: movb %al, (%edx)
; X86-AVX512F-NEXT: kmovw %k0, %ecx
; X86-AVX512F-NEXT: movb %cl, (%eax)
; X86-AVX512F-NEXT: popl %esi
; X86-AVX512F-NEXT: retl
;
Expand All @@ -595,22 +595,22 @@ define void @vselect_v1i1(<1 x i1>* %w, <1 x i1>* %x, <1 x i1>* %y) nounwind {
; X86-AVX512BW-LABEL: vselect_v1i1:
; X86-AVX512BW: # %bb.0:
; X86-AVX512BW-NEXT: pushl %esi
; X86-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-AVX512BW-NEXT: movzbl (%edx), %esi
; X86-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-AVX512BW-NEXT: movzbl (%eax), %esi
; X86-AVX512BW-NEXT: kmovd %esi, %k0
; X86-AVX512BW-NEXT: movzbl (%edx), %edx
; X86-AVX512BW-NEXT: kmovd %edx, %k1
; X86-AVX512BW-NEXT: movzbl (%ecx), %ecx
; X86-AVX512BW-NEXT: kmovd %ecx, %k1
; X86-AVX512BW-NEXT: movzbl (%eax), %eax
; X86-AVX512BW-NEXT: kmovd %eax, %k2
; X86-AVX512BW-NEXT: kmovd %ecx, %k2
; X86-AVX512BW-NEXT: kandnw %k1, %k2, %k1
; X86-AVX512BW-NEXT: kandw %k2, %k0, %k0
; X86-AVX512BW-NEXT: korw %k1, %k0, %k0
; X86-AVX512BW-NEXT: kshiftlw $15, %k0, %k0
; X86-AVX512BW-NEXT: kshiftrw $15, %k0, %k0
; X86-AVX512BW-NEXT: kmovd %k0, %eax
; X86-AVX512BW-NEXT: movb %al, (%edx)
; X86-AVX512BW-NEXT: kmovd %k0, %ecx
; X86-AVX512BW-NEXT: movb %cl, (%eax)
; X86-AVX512BW-NEXT: popl %esi
; X86-AVX512BW-NEXT: retl
;
Expand Down
104 changes: 52 additions & 52 deletions llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1856,32 +1856,32 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind {
; X86-NEXT: pushl %esi # encoding: [0x56]
; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
; X86-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xc0]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
; X86-NEXT: vpcmpleb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x02]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
; X86-NEXT: addl %esi, %ecx # encoding: [0x01,0xf1]
; X86-NEXT: adcl %edx, %eax # encoding: [0x11,0xd0]
; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
; X86-NEXT: addl %esi, %edx # encoding: [0x01,0xf2]
; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x05]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1]
; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
Expand Down Expand Up @@ -1942,32 +1942,32 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwin
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
; X86-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
; X86-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
; X86-NEXT: addl %esi, %ecx # encoding: [0x01,0xf1]
; X86-NEXT: adcl %edx, %eax # encoding: [0x11,0xd0]
; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
; X86-NEXT: addl %esi, %edx # encoding: [0x01,0xf2]
; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x05]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1]
; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
Expand Down Expand Up @@ -2031,32 +2031,32 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind {
; X86-NEXT: pushl %esi # encoding: [0x56]
; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
; X86-NEXT: vpcmpltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x01]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
; X86-NEXT: vpcmpleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x02]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
; X86-NEXT: addl %esi, %ecx # encoding: [0x01,0xf1]
; X86-NEXT: adcl %edx, %eax # encoding: [0x11,0xd0]
; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
; X86-NEXT: addl %esi, %edx # encoding: [0x01,0xf2]
; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x05]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1]
; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x06]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
Expand Down Expand Up @@ -2117,32 +2117,32 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
; X86-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
; X86-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
; X86-NEXT: addl %esi, %ecx # encoding: [0x01,0xf1]
; X86-NEXT: adcl %edx, %eax # encoding: [0x11,0xd0]
; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
; X86-NEXT: addl %esi, %edx # encoding: [0x01,0xf2]
; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x05]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1]
; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4810,9 +4810,9 @@ define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
; X86-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
; X86-NEXT: vpcmpleb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02]
; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
; X86-NEXT: vpcmpleb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
; X86-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05]
Expand All @@ -4825,11 +4825,11 @@ define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
; X86-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
; X86-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3]
; X86-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
; X86-NEXT: vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
; X86-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X86-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
; X86-NEXT: vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1]
; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
; X86-NEXT: # xmm1 = xmm1[0],xmm2[0]
; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
Expand Down Expand Up @@ -5004,9 +5004,9 @@ define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
; X86-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
; X86-NEXT: vpcmpltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
; X86-NEXT: vpcmpleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02]
; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
; X86-NEXT: vpcmpleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
; X86-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
; X86-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05]
Expand All @@ -5019,11 +5019,11 @@ define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
; X86-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
; X86-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
; X86-NEXT: # xmm0 = xmm0[0,1,2],xmm1[3]
; X86-NEXT: vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
; X86-NEXT: vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
; X86-NEXT: vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
; X86-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
; X86-NEXT: # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X86-NEXT: vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
; X86-NEXT: vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1]
; X86-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
; X86-NEXT: # xmm1 = xmm1[0],xmm2[0]
; X86-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/X86/bitreverse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -653,8 +653,7 @@ define i528 @large_promotion(i528 %A) nounwind {
; X86-NEXT: andl $1431655765, %ebx # imm = 0x55555555
; X86-NEXT: shrl %edi
; X86-NEXT: andl $1431655765, %edi # imm = 0x55555555
; X86-NEXT: leal (%edi,%ebx,2), %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: leal (%edi,%ebx,2), %ebx
; X86-NEXT: bswapl %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
Expand All @@ -671,7 +670,8 @@ define i528 @large_promotion(i528 %A) nounwind {
; X86-NEXT: andl $1431655765, %edi # imm = 0x55555555
; X86-NEXT: shrl %esi
; X86-NEXT: andl $1431655765, %esi # imm = 0x55555555
; X86-NEXT: leal (%esi,%edi,2), %ebx
; X86-NEXT: leal (%esi,%edi,2), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: bswapl %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
Expand Down Expand Up @@ -934,13 +934,13 @@ define i528 @large_promotion(i528 %A) nounwind {
; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555
; X86-NEXT: leal (%eax,%ecx,2), %edx
; X86-NEXT: movl (%esp), %esi # 4-byte Reload
; X86-NEXT: shrdl $16, %ebx, %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl $16, %eax, %esi
; X86-NEXT: shrdl $16, %ebx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shrdl $16, %ecx, %ebx
; X86-NEXT: shrdl $16, %eax, %ebx
; X86-NEXT: movl %ebx, (%esp) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: shrdl $16, %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl $16, %eax, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
Expand Down Expand Up @@ -998,9 +998,9 @@ define i528 @large_promotion(i528 %A) nounwind {
; X86-NEXT: movl %ecx, 16(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl (%esp), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: shrl $16, %edx
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/X86/bool-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,18 @@ define i32 @PR15215_bad(<4 x i32> %input) {
; X86-LABEL: PR15215_bad:
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
; X86-NEXT: addb %ah, %ah
; X86-NEXT: andb $1, %dl
; X86-NEXT: orb %ah, %dl
; X86-NEXT: shlb $2, %dl
; X86-NEXT: addb %cl, %cl
; X86-NEXT: andb $1, %cl
; X86-NEXT: orb %ah, %cl
; X86-NEXT: shlb $2, %cl
; X86-NEXT: addb %dl, %dl
; X86-NEXT: andb $1, %al
; X86-NEXT: orb %cl, %al
; X86-NEXT: andb $3, %al
; X86-NEXT: orb %dl, %al
; X86-NEXT: andb $3, %al
; X86-NEXT: orb %cl, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: andl $15, %eax
; X86-NEXT: retl
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/bswap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -277,14 +277,10 @@ define i528 @large_promotion(i528 %A) nounwind {
; CHECK-NEXT: bswapl %ebp
; CHECK-NEXT: shrdl $16, %ebp, %ebx
; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: bswapl %ecx
; CHECK-NEXT: shrdl $16, %ecx, %ebp
; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: shrdl $16, %eax, %ecx
; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: shrdl $16, %eax, %ebp
; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: bswapl %ecx
; CHECK-NEXT: shrdl $16, %ecx, %eax
Expand All @@ -293,10 +289,14 @@ define i528 @large_promotion(i528 %A) nounwind {
; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: shrdl $16, %eax, %ecx
; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: bswapl %ecx
; CHECK-NEXT: shrdl $16, %ecx, %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
; CHECK-NEXT: bswapl %ebp
; CHECK-NEXT: shrdl $16, %ebp, %eax
; CHECK-NEXT: movl %eax, (%esp) # 4-byte Spill
; CHECK-NEXT: shrdl $16, %ebp, %ecx
; CHECK-NEXT: movl %ecx, (%esp) # 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
; CHECK-NEXT: bswapl %ebx
; CHECK-NEXT: shrdl $16, %ebx, %ebp
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/X86/build-vector-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -252,29 +252,29 @@ define <16 x i8> @test_buildvector_v16i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4,
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
; SSE2-32-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
; SSE2-32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-32-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; SSE2-32-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-32-NEXT: retl
;
; SSE2-64-LABEL: test_buildvector_v16i8:
Expand Down
29 changes: 13 additions & 16 deletions llvm/test/CodeGen/X86/clear-highbits.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1128,35 +1128,32 @@ define i64 @oneusei64_d(i64 %val, i64 %numhighbits, i64* %escape) nounwind {
;
; X86-BMI1-LABEL: oneusei64_d:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %ebp
; X86-BMI1-NEXT: pushl %ebx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movl %eax, %edi
; X86-BMI1-NEXT: shll %cl, %edi
; X86-BMI1-NEXT: shldl %cl, %eax, %esi
; X86-BMI1-NEXT: movl %edx, %eax
; X86-BMI1-NEXT: shll %cl, %eax
; X86-BMI1-NEXT: shldl %cl, %edx, %esi
; X86-BMI1-NEXT: testb $32, %cl
; X86-BMI1-NEXT: cmovnel %edi, %esi
; X86-BMI1-NEXT: movl %esi, %ebx
; X86-BMI1-NEXT: shrl %cl, %ebx
; X86-BMI1-NEXT: cmovnel %eax, %esi
; X86-BMI1-NEXT: movl %esi, %edi
; X86-BMI1-NEXT: shrl %cl, %edi
; X86-BMI1-NEXT: xorl %edx, %edx
; X86-BMI1-NEXT: testb $32, %cl
; X86-BMI1-NEXT: cmovnel %edx, %edi
; X86-BMI1-NEXT: cmovel %ebx, %edx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-BMI1-NEXT: movl %edi, (%ebp)
; X86-BMI1-NEXT: movl %edi, %eax
; X86-BMI1-NEXT: cmovnel %edx, %eax
; X86-BMI1-NEXT: cmovel %edi, %edx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-BMI1-NEXT: movl %eax, (%ebx)
; X86-BMI1-NEXT: shrdl %cl, %esi, %eax
; X86-BMI1-NEXT: testb $32, %cl
; X86-BMI1-NEXT: movl %esi, 4(%ebp)
; X86-BMI1-NEXT: cmovnel %ebx, %eax
; X86-BMI1-NEXT: movl %esi, 4(%ebx)
; X86-BMI1-NEXT: cmovnel %edi, %eax
; X86-BMI1-NEXT: popl %esi
; X86-BMI1-NEXT: popl %edi
; X86-BMI1-NEXT: popl %ebx
; X86-BMI1-NEXT: popl %ebp
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: oneusei64_d:
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/X86/combine-sbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -63,23 +63,23 @@ define void @PR25858_i64(%WideUInt64* sret(%WideUInt64), %WideUInt64*, %WideUInt
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl (%edx), %esi
; X86-NEXT: movl 4(%edx), %edi
; X86-NEXT: subl (%ecx), %esi
; X86-NEXT: sbbl 4(%ecx), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl (%edi), %ecx
; X86-NEXT: movl 4(%edi), %edx
; X86-NEXT: subl (%esi), %ecx
; X86-NEXT: sbbl 4(%esi), %edx
; X86-NEXT: setb %bl
; X86-NEXT: movl 12(%edx), %ebp
; X86-NEXT: movl 8(%edx), %edx
; X86-NEXT: subl 8(%ecx), %edx
; X86-NEXT: sbbl 12(%ecx), %ebp
; X86-NEXT: movzbl %bl, %ecx
; X86-NEXT: subl %ecx, %edx
; X86-NEXT: movl 12(%edi), %ebp
; X86-NEXT: movl 8(%edi), %edi
; X86-NEXT: subl 8(%esi), %edi
; X86-NEXT: sbbl 12(%esi), %ebp
; X86-NEXT: movzbl %bl, %esi
; X86-NEXT: subl %esi, %edi
; X86-NEXT: sbbl $0, %ebp
; X86-NEXT: movl %esi, (%eax)
; X86-NEXT: movl %edi, 4(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: movl %edx, 4(%eax)
; X86-NEXT: movl %edi, 8(%eax)
; X86-NEXT: movl %ebp, 12(%eax)
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
Expand Down
192 changes: 94 additions & 98 deletions llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ define i64 @scalar_i64(i64 %x, i64 %y, i64* %divdst) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: pushl %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: calll __divdi3
Expand All @@ -136,10 +136,10 @@ define i64 @scalar_i64(i64 %x, i64 %y, i64* %divdst) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ecx, 4(%edx)
; X86-NEXT: movl %eax, (%edx)
; X86-NEXT: imull %eax, %ebx
; X86-NEXT: mull %ebp
; X86-NEXT: addl %ebx, %edx
; X86-NEXT: imull %ebp, %ecx
; X86-NEXT: imull %eax, %ebp
; X86-NEXT: mull %ebx
; X86-NEXT: addl %ebp, %edx
; X86-NEXT: imull %ebx, %ecx
; X86-NEXT: addl %edx, %ecx
; X86-NEXT: subl %eax, %esi
; X86-NEXT: sbbl %ecx, %edi
Expand Down Expand Up @@ -178,83 +178,79 @@ define i128 @scalar_i128(i128 %x, i128 %y, i128* %divdst) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $48, %esp
; X86-NEXT: subl $40, %esp
; X86-NEXT: movl 44(%ebp), %edi
; X86-NEXT: movl 28(%ebp), %ecx
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl 40(%ebp)
; X86-NEXT: pushl 36(%ebp)
; X86-NEXT: pushl 32(%ebp)
; X86-NEXT: pushl %ecx
; X86-NEXT: movl %ecx, %ebx
; X86-NEXT: pushl 28(%ebp)
; X86-NEXT: pushl 24(%ebp)
; X86-NEXT: pushl 20(%ebp)
; X86-NEXT: pushl 16(%ebp)
; X86-NEXT: pushl 12(%ebp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll __divti3
; X86-NEXT: addl $32, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %edi, %edx
; X86-NEXT: movl %ecx, 12(%edi)
; X86-NEXT: movl %esi, 8(%edi)
; X86-NEXT: movl %eax, 4(%edi)
; X86-NEXT: movl %edx, (%edi)
; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: imull %ebx, %ecx
; X86-NEXT: movl %eax, %edi
; X86-NEXT: movl %ebx, (%edx)
; X86-NEXT: movl 28(%ebp), %eax
; X86-NEXT: imull %eax, %ecx
; X86-NEXT: mull %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: imull 32(%ebp), %esi
; X86-NEXT: addl %edx, %esi
; X86-NEXT: movl 36(%ebp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: imull %ebx, %ecx
; X86-NEXT: mull %edi
; X86-NEXT: imull %edi, %ecx
; X86-NEXT: mull %ebx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: movl %edi, %eax
; X86-NEXT: movl 40(%ebp), %edi
; X86-NEXT: imull %eax, %edi
; X86-NEXT: addl %edx, %edi
; X86-NEXT: movl 40(%ebp), %eax
; X86-NEXT: imull %ebx, %eax
; X86-NEXT: addl %edx, %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl %esi, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, %edi
; X86-NEXT: movl 28(%ebp), %ecx
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %esi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: movl 28(%ebp), %ecx
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: adcl $0, %esi
; X86-NEXT: movl %edx, (%esp) # 4-byte Spill
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: addl (%esp), %esi # 4-byte Folded Reload
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: mull 32(%ebp)
; X86-NEXT: movl %edx, %edi
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: adcl %esi, %edi
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: addl %esi, %eax
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: adcl %ecx, %ebx
; X86-NEXT: setb %cl
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull 32(%ebp)
; X86-NEXT: addl %edi, %eax
; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movzbl %cl, %ecx
; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl 16(%ebp), %esi
; X86-NEXT: sbbl %ebx, %esi
; X86-NEXT: sbbl (%esp), %esi # 4-byte Folded Reload
; X86-NEXT: movl 20(%ebp), %edi
; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: movl 24(%ebp), %ebx
Expand Down Expand Up @@ -386,35 +382,35 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y, <16 x i8>* %divdst)
; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
; X86-NEXT: movd %edx, %xmm4
; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; X86-NEXT: movd %esi, %xmm2
; X86-NEXT: movd %esi, %xmm7
; X86-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
; X86-NEXT: movd %edi, %xmm5
; X86-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
; X86-NEXT: movd %edi, %xmm2
; X86-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3],xmm7[4],xmm4[4],xmm7[5],xmm4[5],xmm7[6],xmm4[6],xmm7[7],xmm4[7]
; X86-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3]
; X86-NEXT: movd %ebx, %xmm4
; X86-NEXT: movzbl %cl, %ecx
; X86-NEXT: movd %ecx, %xmm6
; X86-NEXT: movd %ecx, %xmm5
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
; X86-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: movd %eax, %xmm5
; X86-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3],xmm5[4],xmm6[4],xmm5[5],xmm6[5],xmm5[6],xmm6[6],xmm5[7],xmm6[7]
; X86-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
; X86-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1]
; X86-NEXT: movdqa %xmm5, %xmm2
; X86-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; X86-NEXT: movdqa %xmm2, (%ecx)
; X86-NEXT: movd %eax, %xmm2
; X86-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[1],xmm7[1]
; X86-NEXT: movdqa %xmm2, %xmm4
; X86-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm3[0]
; X86-NEXT: movdqa %xmm4, (%ecx)
; X86-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X86-NEXT: movdqa %xmm1, %xmm2
; X86-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; X86-NEXT: pmullw %xmm3, %xmm2
; X86-NEXT: movdqa %xmm1, %xmm4
; X86-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; X86-NEXT: pmullw %xmm3, %xmm4
; X86-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; X86-NEXT: pand %xmm3, %xmm2
; X86-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X86-NEXT: pand %xmm3, %xmm4
; X86-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X86-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X86-NEXT: pmullw %xmm5, %xmm1
; X86-NEXT: pmullw %xmm2, %xmm1
; X86-NEXT: pand %xmm3, %xmm1
; X86-NEXT: packuswb %xmm2, %xmm1
; X86-NEXT: packuswb %xmm4, %xmm1
; X86-NEXT: psubb %xmm1, %xmm0
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
Expand Down Expand Up @@ -585,31 +581,31 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y, <8 x i16>* %divdst
; X86-NEXT: cwtd
; X86-NEXT: idivw %si
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: movd %eax, %xmm4
; X86-NEXT: pextrw $2, %xmm0, %eax
; X86-NEXT: pextrw $2, %xmm1, %esi
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: cwtd
; X86-NEXT: idivw %si
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: movd %eax, %xmm4
; X86-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
; X86-NEXT: pextrw $1, %xmm0, %eax
; X86-NEXT: pextrw $1, %xmm1, %esi
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: cwtd
; X86-NEXT: idivw %si
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: movd %eax, %xmm4
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: movd %xmm1, %esi
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: cwtd
; X86-NEXT: idivw %si
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: movd %eax, %xmm5
; X86-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
; X86-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
; X86-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
; X86-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
; X86-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0]
; X86-NEXT: movdqa %xmm5, (%ecx)
; X86-NEXT: pmullw %xmm1, %xmm5
Expand Down Expand Up @@ -704,38 +700,38 @@ define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %divdst
; X86-NEXT: movd %xmm2, %esi
; X86-NEXT: cltd
; X86-NEXT: idivl %esi
; X86-NEXT: movd %eax, %xmm2
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
; X86-NEXT: movd %xmm3, %eax
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
; X86-NEXT: movd %xmm3, %esi
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
; X86-NEXT: movd %xmm2, %eax
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
; X86-NEXT: movd %xmm2, %esi
; X86-NEXT: cltd
; X86-NEXT: idivl %esi
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; X86-NEXT: movd %eax, %xmm2
; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: movd %xmm1, %esi
; X86-NEXT: cltd
; X86-NEXT: idivl %esi
; X86-NEXT: movd %eax, %xmm2
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,1,1]
; X86-NEXT: movd %xmm4, %eax
; X86-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,1,1]
; X86-NEXT: movd %xmm4, %esi
; X86-NEXT: cltd
; X86-NEXT: idivl %esi
; X86-NEXT: movd %eax, %xmm4
; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
; X86-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; X86-NEXT: movdqa %xmm2, (%ecx)
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
; X86-NEXT: pmuludq %xmm1, %xmm2
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; X86-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; X86-NEXT: movdqa %xmm3, (%ecx)
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
; X86-NEXT: pmuludq %xmm1, %xmm3
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X86-NEXT: pmuludq %xmm3, %xmm1
; X86-NEXT: pmuludq %xmm2, %xmm1
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X86-NEXT: psubd %xmm2, %xmm0
; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
; X86-NEXT: psubd %xmm3, %xmm0
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
Expand Down Expand Up @@ -817,25 +813,25 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y, <2 x i64>* %divdst
; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X86-NEXT: movdqu %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X86-NEXT: calll __divdi3
; X86-NEXT: movd %edx, %xmm0
; X86-NEXT: movd %eax, %xmm1
; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X86-NEXT: movd %edx, %xmm1
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
; X86-NEXT: movdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; X86-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X86-NEXT: movdqa %xmm1, (%esi)
; X86-NEXT: movdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm3 # 16-byte Reload
; X86-NEXT: movdqa %xmm3, %xmm0
; X86-NEXT: psrlq $32, %xmm0
; X86-NEXT: pmuludq %xmm1, %xmm0
; X86-NEXT: movdqa %xmm1, %xmm2
; X86-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
; X86-NEXT: movdqa %xmm3, (%esi)
; X86-NEXT: movdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrlq $32, %xmm1
; X86-NEXT: pmuludq %xmm3, %xmm1
; X86-NEXT: movdqa %xmm3, %xmm2
; X86-NEXT: psrlq $32, %xmm2
; X86-NEXT: pmuludq %xmm3, %xmm2
; X86-NEXT: paddq %xmm0, %xmm2
; X86-NEXT: pmuludq %xmm0, %xmm2
; X86-NEXT: paddq %xmm1, %xmm2
; X86-NEXT: psllq $32, %xmm2
; X86-NEXT: pmuludq %xmm3, %xmm1
; X86-NEXT: paddq %xmm2, %xmm1
; X86-NEXT: pmuludq %xmm0, %xmm3
; X86-NEXT: paddq %xmm2, %xmm3
; X86-NEXT: movdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; X86-NEXT: psubq %xmm1, %xmm0
; X86-NEXT: psubq %xmm3, %xmm0
; X86-NEXT: addl $64, %esp
; X86-NEXT: popl %esi
; X86-NEXT: retl
Expand Down
192 changes: 94 additions & 98 deletions llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ define i64 @scalar_i64(i64 %x, i64 %y, i64* %divdst) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: pushl %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: calll __udivdi3
Expand All @@ -136,10 +136,10 @@ define i64 @scalar_i64(i64 %x, i64 %y, i64* %divdst) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ecx, 4(%edx)
; X86-NEXT: movl %eax, (%edx)
; X86-NEXT: imull %eax, %ebx
; X86-NEXT: mull %ebp
; X86-NEXT: addl %ebx, %edx
; X86-NEXT: imull %ebp, %ecx
; X86-NEXT: imull %eax, %ebp
; X86-NEXT: mull %ebx
; X86-NEXT: addl %ebp, %edx
; X86-NEXT: imull %ebx, %ecx
; X86-NEXT: addl %edx, %ecx
; X86-NEXT: subl %eax, %esi
; X86-NEXT: sbbl %ecx, %edi
Expand Down Expand Up @@ -178,83 +178,79 @@ define i128 @scalar_i128(i128 %x, i128 %y, i128* %divdst) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $48, %esp
; X86-NEXT: subl $40, %esp
; X86-NEXT: movl 44(%ebp), %edi
; X86-NEXT: movl 28(%ebp), %ecx
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: pushl 40(%ebp)
; X86-NEXT: pushl 36(%ebp)
; X86-NEXT: pushl 32(%ebp)
; X86-NEXT: pushl %ecx
; X86-NEXT: movl %ecx, %ebx
; X86-NEXT: pushl 28(%ebp)
; X86-NEXT: pushl 24(%ebp)
; X86-NEXT: pushl 20(%ebp)
; X86-NEXT: pushl 16(%ebp)
; X86-NEXT: pushl 12(%ebp)
; X86-NEXT: pushl %eax
; X86-NEXT: calll __udivti3
; X86-NEXT: addl $32, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %edi, %edx
; X86-NEXT: movl %ecx, 12(%edi)
; X86-NEXT: movl %esi, 8(%edi)
; X86-NEXT: movl %eax, 4(%edi)
; X86-NEXT: movl %edx, (%edi)
; X86-NEXT: movl %edx, %edi
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: imull %ebx, %ecx
; X86-NEXT: movl %eax, %edi
; X86-NEXT: movl %ebx, (%edx)
; X86-NEXT: movl 28(%ebp), %eax
; X86-NEXT: imull %eax, %ecx
; X86-NEXT: mull %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: imull 32(%ebp), %esi
; X86-NEXT: addl %edx, %esi
; X86-NEXT: movl 36(%ebp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: imull %ebx, %ecx
; X86-NEXT: mull %edi
; X86-NEXT: imull %edi, %ecx
; X86-NEXT: mull %ebx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: movl %edi, %eax
; X86-NEXT: movl 40(%ebp), %edi
; X86-NEXT: imull %eax, %edi
; X86-NEXT: addl %edx, %edi
; X86-NEXT: movl 40(%ebp), %eax
; X86-NEXT: imull %ebx, %eax
; X86-NEXT: addl %edx, %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: adcl %esi, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, %edi
; X86-NEXT: movl 28(%ebp), %ecx
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %esi, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: movl 28(%ebp), %ecx
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: adcl $0, %esi
; X86-NEXT: movl %edx, (%esp) # 4-byte Spill
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %ecx
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: addl (%esp), %esi # 4-byte Folded Reload
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: mull 32(%ebp)
; X86-NEXT: movl %edx, %edi
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: adcl %esi, %edi
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: addl %esi, %eax
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: adcl %ecx, %ebx
; X86-NEXT: setb %cl
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull 32(%ebp)
; X86-NEXT: addl %edi, %eax
; X86-NEXT: addl %ebx, %eax
; X86-NEXT: movzbl %cl, %ecx
; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl 16(%ebp), %esi
; X86-NEXT: sbbl %ebx, %esi
; X86-NEXT: sbbl (%esp), %esi # 4-byte Folded Reload
; X86-NEXT: movl 20(%ebp), %edi
; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: movl 24(%ebp), %ebx
Expand Down Expand Up @@ -386,35 +382,35 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y, <16 x i8>* %divdst)
; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
; X86-NEXT: movd %edx, %xmm4
; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; X86-NEXT: movd %esi, %xmm2
; X86-NEXT: movd %esi, %xmm7
; X86-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
; X86-NEXT: movd %edi, %xmm5
; X86-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
; X86-NEXT: movd %edi, %xmm2
; X86-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3],xmm7[4],xmm4[4],xmm7[5],xmm4[5],xmm7[6],xmm4[6],xmm7[7],xmm4[7]
; X86-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3]
; X86-NEXT: movd %ebx, %xmm4
; X86-NEXT: movzbl %cl, %ecx
; X86-NEXT: movd %ecx, %xmm6
; X86-NEXT: movd %ecx, %xmm5
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
; X86-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: movd %eax, %xmm5
; X86-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3],xmm5[4],xmm6[4],xmm5[5],xmm6[5],xmm5[6],xmm6[6],xmm5[7],xmm6[7]
; X86-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
; X86-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1]
; X86-NEXT: movdqa %xmm5, %xmm2
; X86-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; X86-NEXT: movdqa %xmm2, (%ecx)
; X86-NEXT: movd %eax, %xmm2
; X86-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[1],xmm7[1]
; X86-NEXT: movdqa %xmm2, %xmm4
; X86-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm3[0]
; X86-NEXT: movdqa %xmm4, (%ecx)
; X86-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X86-NEXT: movdqa %xmm1, %xmm2
; X86-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; X86-NEXT: pmullw %xmm3, %xmm2
; X86-NEXT: movdqa %xmm1, %xmm4
; X86-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; X86-NEXT: pmullw %xmm3, %xmm4
; X86-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; X86-NEXT: pand %xmm3, %xmm2
; X86-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X86-NEXT: pand %xmm3, %xmm4
; X86-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X86-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X86-NEXT: pmullw %xmm5, %xmm1
; X86-NEXT: pmullw %xmm2, %xmm1
; X86-NEXT: pand %xmm3, %xmm1
; X86-NEXT: packuswb %xmm2, %xmm1
; X86-NEXT: packuswb %xmm4, %xmm1
; X86-NEXT: psubb %xmm1, %xmm0
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
Expand Down Expand Up @@ -585,31 +581,31 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y, <8 x i16>* %divdst
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divw %si
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: movd %eax, %xmm4
; X86-NEXT: pextrw $2, %xmm0, %eax
; X86-NEXT: pextrw $2, %xmm1, %esi
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divw %si
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: movd %eax, %xmm4
; X86-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
; X86-NEXT: pextrw $1, %xmm0, %eax
; X86-NEXT: pextrw $1, %xmm1, %esi
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divw %si
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: movd %eax, %xmm4
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: movd %xmm1, %esi
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divw %si
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: movd %eax, %xmm5
; X86-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
; X86-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
; X86-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
; X86-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
; X86-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0]
; X86-NEXT: movdqa %xmm5, (%ecx)
; X86-NEXT: pmullw %xmm1, %xmm5
Expand Down Expand Up @@ -704,38 +700,38 @@ define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %divdst
; X86-NEXT: movd %xmm2, %esi
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divl %esi
; X86-NEXT: movd %eax, %xmm2
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
; X86-NEXT: movd %xmm3, %eax
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
; X86-NEXT: movd %xmm3, %esi
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
; X86-NEXT: movd %xmm2, %eax
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
; X86-NEXT: movd %xmm2, %esi
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divl %esi
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; X86-NEXT: movd %eax, %xmm2
; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: movd %xmm1, %esi
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divl %esi
; X86-NEXT: movd %eax, %xmm2
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,1,1]
; X86-NEXT: movd %xmm4, %eax
; X86-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,1,1]
; X86-NEXT: movd %xmm4, %esi
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divl %esi
; X86-NEXT: movd %eax, %xmm4
; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
; X86-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; X86-NEXT: movdqa %xmm2, (%ecx)
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
; X86-NEXT: pmuludq %xmm1, %xmm2
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; X86-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; X86-NEXT: movdqa %xmm3, (%ecx)
; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
; X86-NEXT: pmuludq %xmm1, %xmm3
; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; X86-NEXT: pmuludq %xmm3, %xmm1
; X86-NEXT: pmuludq %xmm2, %xmm1
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X86-NEXT: psubd %xmm2, %xmm0
; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
; X86-NEXT: psubd %xmm3, %xmm0
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
Expand Down Expand Up @@ -817,25 +813,25 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y, <2 x i64>* %divdst
; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X86-NEXT: movdqu %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X86-NEXT: calll __udivdi3
; X86-NEXT: movd %edx, %xmm0
; X86-NEXT: movd %eax, %xmm1
; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X86-NEXT: movd %edx, %xmm1
; X86-NEXT: movd %eax, %xmm3
; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
; X86-NEXT: movdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; X86-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X86-NEXT: movdqa %xmm1, (%esi)
; X86-NEXT: movdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm3 # 16-byte Reload
; X86-NEXT: movdqa %xmm3, %xmm0
; X86-NEXT: psrlq $32, %xmm0
; X86-NEXT: pmuludq %xmm1, %xmm0
; X86-NEXT: movdqa %xmm1, %xmm2
; X86-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
; X86-NEXT: movdqa %xmm3, (%esi)
; X86-NEXT: movdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; X86-NEXT: movdqa %xmm0, %xmm1
; X86-NEXT: psrlq $32, %xmm1
; X86-NEXT: pmuludq %xmm3, %xmm1
; X86-NEXT: movdqa %xmm3, %xmm2
; X86-NEXT: psrlq $32, %xmm2
; X86-NEXT: pmuludq %xmm3, %xmm2
; X86-NEXT: paddq %xmm0, %xmm2
; X86-NEXT: pmuludq %xmm0, %xmm2
; X86-NEXT: paddq %xmm1, %xmm2
; X86-NEXT: psllq $32, %xmm2
; X86-NEXT: pmuludq %xmm3, %xmm1
; X86-NEXT: paddq %xmm2, %xmm1
; X86-NEXT: pmuludq %xmm0, %xmm3
; X86-NEXT: paddq %xmm2, %xmm3
; X86-NEXT: movdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; X86-NEXT: psubq %xmm1, %xmm0
; X86-NEXT: psubq %xmm3, %xmm0
; X86-NEXT: addl $64, %esp
; X86-NEXT: popl %esi
; X86-NEXT: retl
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/X86/fp128-cast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1139,19 +1139,19 @@ define dso_local i32 @TestBits128(fp128 %ld) nounwind {
; X32-NEXT: subl $20, %esp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-NEXT: subl $12, %esp
; X32-NEXT: leal {{[0-9]+}}(%esp), %edi
; X32-NEXT: leal {{[0-9]+}}(%esp), %edx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: pushl %edx
; X32-NEXT: pushl %ecx
; X32-NEXT: pushl %eax
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: pushl %edx
; X32-NEXT: pushl %ecx
; X32-NEXT: pushl %eax
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %edx
; X32-NEXT: calll __multf3
; X32-NEXT: addl $44, %esp
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
Expand Down
Loading