diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll index 7e3b284d0b595b..832ee5d3923682 100644 --- a/llvm/test/CodeGen/AArch64/funnel-shift.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -5,6 +5,7 @@ declare i8 @llvm.fshl.i8(i8, i8, i8) declare i16 @llvm.fshl.i16(i16, i16, i16) declare i32 @llvm.fshl.i32(i32, i32, i32) declare i64 @llvm.fshl.i64(i64, i64, i64) +declare i128 @llvm.fshl.i128(i128, i128, i128) declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) declare i8 @llvm.fshr.i8(i8, i8, i8) @@ -42,6 +43,37 @@ define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) { ret i64 %f } +define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind { +; CHECK-LABEL: fshl_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w9, w4 +; CHECK-NEXT: and x12, x9, #0x7f +; CHECK-NEXT: extr x8, x3, x2, #1 +; CHECK-NEXT: lsr x10, x3, #1 +; CHECK-NEXT: tst x12, #0x40 +; CHECK-NEXT: lsr x12, x0, #1 +; CHECK-NEXT: lsr x8, x8, x9 +; CHECK-NEXT: lsr x12, x12, x9 +; CHECK-NEXT: lsr x9, x10, x9 +; CHECK-NEXT: lsl x10, x10, #1 +; CHECK-NEXT: lsl x10, x10, x4 +; CHECK-NEXT: lsl x11, x1, x4 +; CHECK-NEXT: and x14, x4, #0x7f +; CHECK-NEXT: orr x8, x10, x8 +; CHECK-NEXT: lsl x13, x0, x4 +; CHECK-NEXT: orr x11, x11, x12 +; CHECK-NEXT: csel x10, xzr, x9, ne +; CHECK-NEXT: csel x8, x9, x8, ne +; CHECK-NEXT: tst x14, #0x40 +; CHECK-NEXT: csel x9, x13, x11, ne +; CHECK-NEXT: csel x11, xzr, x13, ne +; CHECK-NEXT: orr x1, x9, x10 +; CHECK-NEXT: orr x0, x11, x8 +; CHECK-NEXT: ret + %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) + ret i128 %f +} + ; Verify that weird types are minimally supported. declare i37 @llvm.fshl.i37(i37, i37, i37) define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll index 9a4398b90ab42c..c33904082f2384 100644 --- a/llvm/test/CodeGen/PowerPC/funnel-shift.ll +++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll @@ -7,6 +7,7 @@ declare i8 @llvm.fshl.i8(i8, i8, i8) declare i16 @llvm.fshl.i16(i16, i16, i16) declare i32 @llvm.fshl.i32(i32, i32, i32) declare i64 @llvm.fshl.i64(i64, i64, i64) +declare i128 @llvm.fshl.i128(i128, i128, i128) declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) declare i8 @llvm.fshr.i8(i8, i8, i8) @@ -108,6 +109,395 @@ define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) { ret i64 %f } +define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind { +; CHECK32_32-LABEL: fshl_i128: +; CHECK32_32: # %bb.0: +; CHECK32_32-NEXT: stwu 1, -64(1) +; CHECK32_32-NEXT: lwz 0, 84(1) +; CHECK32_32-NEXT: rotlwi 12, 8, 31 +; CHECK32_32-NEXT: srwi 11, 7, 1 +; CHECK32_32-NEXT: rlwimi 12, 7, 31, 0, 0 +; CHECK32_32-NEXT: andi. 7, 0, 127 +; CHECK32_32-NEXT: stw 27, 44(1) # 4-byte Folded Spill +; CHECK32_32-NEXT: rotlwi 10, 10, 31 +; CHECK32_32-NEXT: stw 30, 56(1) # 4-byte Folded Spill +; CHECK32_32-NEXT: rotlwi 30, 9, 31 +; CHECK32_32-NEXT: subfic 27, 7, 32 +; CHECK32_32-NEXT: stw 22, 24(1) # 4-byte Folded Spill +; CHECK32_32-NEXT: rlwimi 10, 9, 31, 0, 0 +; CHECK32_32-NEXT: stw 25, 36(1) # 4-byte Folded Spill +; CHECK32_32-NEXT: rlwimi 30, 8, 31, 0, 0 +; CHECK32_32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; CHECK32_32-NEXT: not 8, 0 +; CHECK32_32-NEXT: subfic 9, 7, 96 +; CHECK32_32-NEXT: addi 0, 7, -64 +; CHECK32_32-NEXT: slw 28, 3, 7 +; CHECK32_32-NEXT: subfic 25, 7, 64 +; CHECK32_32-NEXT: srw 22, 4, 27 +; CHECK32_32-NEXT: stw 20, 16(1) # 4-byte Folded Spill +; CHECK32_32-NEXT: srw 9, 6, 9 +; CHECK32_32-NEXT: stw 23, 28(1) # 4-byte Folded Spill +; CHECK32_32-NEXT: slw 23, 5, 0 +; CHECK32_32-NEXT: stw 29, 52(1) # 4-byte Folded Spill +; CHECK32_32-NEXT: addi 29, 7, -96 +; CHECK32_32-NEXT: srw 20, 5, 25 +; CHECK32_32-NEXT: or 28, 28, 22 +; CHECK32_32-NEXT: srw 22, 6, 25 +; CHECK32_32-NEXT: subfic 25, 25, 32 +; CHECK32_32-NEXT: stw 24, 32(1) # 4-byte Folded Spill +; CHECK32_32-NEXT: mcrf 1, 0 +; CHECK32_32-NEXT: stw 26, 40(1) # 4-byte Folded Spill +; CHECK32_32-NEXT: addi 26, 7, -32 +; CHECK32_32-NEXT: andi. 8, 8, 127 +; CHECK32_32-NEXT: slw 24, 5, 7 +; CHECK32_32-NEXT: slw 29, 6, 29 +; CHECK32_32-NEXT: or 9, 23, 9 +; CHECK32_32-NEXT: slw 25, 5, 25 +; CHECK32_32-NEXT: srw 5, 5, 27 +; CHECK32_32-NEXT: srw 27, 6, 27 +; CHECK32_32-NEXT: stw 21, 20(1) # 4-byte Folded Spill +; CHECK32_32-NEXT: slw 21, 4, 26 +; CHECK32_32-NEXT: subfic 23, 8, 32 +; CHECK32_32-NEXT: or 27, 24, 27 +; CHECK32_32-NEXT: subfic 24, 8, 96 +; CHECK32_32-NEXT: or 9, 9, 29 +; CHECK32_32-NEXT: addi 29, 8, -64 +; CHECK32_32-NEXT: or 25, 22, 25 +; CHECK32_32-NEXT: stw 19, 12(1) # 4-byte Folded Spill +; CHECK32_32-NEXT: srw 19, 12, 8 +; CHECK32_32-NEXT: or 28, 28, 21 +; CHECK32_32-NEXT: slw 21, 11, 23 +; CHECK32_32-NEXT: slw 24, 11, 24 +; CHECK32_32-NEXT: srw 22, 12, 29 +; CHECK32_32-NEXT: slw 26, 6, 26 +; CHECK32_32-NEXT: or 5, 25, 5 +; CHECK32_32-NEXT: addi 25, 8, -96 +; CHECK32_32-NEXT: or 21, 19, 21 +; CHECK32_32-NEXT: srw 19, 10, 8 +; CHECK32_32-NEXT: or 24, 22, 24 +; CHECK32_32-NEXT: slw 22, 30, 23 +; CHECK32_32-NEXT: or 27, 27, 26 +; CHECK32_32-NEXT: addi 26, 8, -32 +; CHECK32_32-NEXT: srw 25, 11, 25 +; CHECK32_32-NEXT: or 22, 19, 22 +; CHECK32_32-NEXT: or 28, 28, 20 +; CHECK32_32-NEXT: srw 20, 11, 26 +; CHECK32_32-NEXT: or 25, 24, 25 +; CHECK32_32-NEXT: subfic 24, 8, 64 +; CHECK32_32-NEXT: srw 26, 30, 26 +; CHECK32_32-NEXT: or 26, 22, 26 +; CHECK32_32-NEXT: subfic 22, 24, 32 +; CHECK32_32-NEXT: slw 23, 12, 23 +; CHECK32_32-NEXT: srw 22, 12, 22 +; CHECK32_32-NEXT: slw 12, 12, 24 +; CHECK32_32-NEXT: slw 24, 11, 24 +; CHECK32_32-NEXT: cmplwi 5, 7, 64 +; CHECK32_32-NEXT: or 24, 24, 22 +; CHECK32_32-NEXT: slw 22, 6, 0 +; CHECK32_32-NEXT: slw 6, 6, 7 +; CHECK32_32-NEXT: slw 7, 4, 7 +; CHECK32_32-NEXT: srw 29, 11, 29 +; CHECK32_32-NEXT: srw 11, 11, 8 +; CHECK32_32-NEXT: cmplwi 6, 8, 64 +; CHECK32_32-NEXT: srw 8, 30, 8 +; CHECK32_32-NEXT: or 5, 7, 5 +; CHECK32_32-NEXT: or 7, 26, 12 +; CHECK32_32-NEXT: or 12, 24, 23 +; CHECK32_32-NEXT: bc 12, 20, .LBB2_1 +; CHECK32_32-NEXT: b .LBB2_2 +; CHECK32_32-NEXT: .LBB2_1: +; CHECK32_32-NEXT: addi 9, 28, 0 +; CHECK32_32-NEXT: .LBB2_2: +; CHECK32_32-NEXT: li 28, 0 +; CHECK32_32-NEXT: bc 12, 20, .LBB2_4 +; CHECK32_32-NEXT: # %bb.3: +; CHECK32_32-NEXT: ori 5, 22, 0 +; CHECK32_32-NEXT: b .LBB2_4 +; CHECK32_32-NEXT: .LBB2_4: +; CHECK32_32-NEXT: bc 12, 24, .LBB2_6 +; CHECK32_32-NEXT: # %bb.5: +; CHECK32_32-NEXT: ori 7, 25, 0 +; CHECK32_32-NEXT: b .LBB2_6 +; CHECK32_32-NEXT: .LBB2_6: +; CHECK32_32-NEXT: or 8, 8, 12 +; CHECK32_32-NEXT: or 21, 21, 20 +; CHECK32_32-NEXT: bc 12, 20, .LBB2_8 +; CHECK32_32-NEXT: # %bb.7: +; CHECK32_32-NEXT: ori 6, 28, 0 +; CHECK32_32-NEXT: b .LBB2_8 +; CHECK32_32-NEXT: .LBB2_8: +; CHECK32_32-NEXT: bc 12, 6, .LBB2_10 +; CHECK32_32-NEXT: # %bb.9: +; CHECK32_32-NEXT: ori 4, 5, 0 +; CHECK32_32-NEXT: b .LBB2_10 +; CHECK32_32-NEXT: .LBB2_10: +; CHECK32_32-NEXT: bc 12, 2, .LBB2_12 +; CHECK32_32-NEXT: # %bb.11: +; CHECK32_32-NEXT: ori 5, 7, 0 +; CHECK32_32-NEXT: b .LBB2_13 +; CHECK32_32-NEXT: .LBB2_12: +; CHECK32_32-NEXT: addi 5, 10, 0 +; CHECK32_32-NEXT: .LBB2_13: +; CHECK32_32-NEXT: bc 12, 24, .LBB2_15 +; CHECK32_32-NEXT: # %bb.14: +; CHECK32_32-NEXT: ori 7, 29, 0 +; CHECK32_32-NEXT: ori 11, 28, 0 +; CHECK32_32-NEXT: ori 0, 28, 0 +; CHECK32_32-NEXT: b .LBB2_16 +; CHECK32_32-NEXT: .LBB2_15: +; CHECK32_32-NEXT: addi 7, 8, 0 +; CHECK32_32-NEXT: addi 0, 21, 0 +; CHECK32_32-NEXT: .LBB2_16: +; CHECK32_32-NEXT: bc 12, 6, .LBB2_18 +; CHECK32_32-NEXT: # %bb.17: +; CHECK32_32-NEXT: ori 3, 9, 0 +; CHECK32_32-NEXT: b .LBB2_18 +; CHECK32_32-NEXT: .LBB2_18: +; CHECK32_32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload +; CHECK32_32-NEXT: or 6, 6, 5 +; CHECK32_32-NEXT: bc 12, 20, .LBB2_20 +; CHECK32_32-NEXT: # %bb.19: +; CHECK32_32-NEXT: ori 5, 28, 0 +; CHECK32_32-NEXT: b .LBB2_21 +; CHECK32_32-NEXT: .LBB2_20: +; CHECK32_32-NEXT: addi 5, 27, 0 +; CHECK32_32-NEXT: .LBB2_21: +; CHECK32_32-NEXT: bc 12, 2, .LBB2_22 +; CHECK32_32-NEXT: b .LBB2_23 +; CHECK32_32-NEXT: .LBB2_22: +; CHECK32_32-NEXT: addi 7, 30, 0 +; CHECK32_32-NEXT: .LBB2_23: +; CHECK32_32-NEXT: or 3, 3, 11 +; CHECK32_32-NEXT: or 4, 4, 0 +; CHECK32_32-NEXT: or 5, 5, 7 +; CHECK32_32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload +; CHECK32_32-NEXT: lwz 28, 48(1) # 4-byte Folded Reload +; CHECK32_32-NEXT: lwz 27, 44(1) # 4-byte Folded Reload +; CHECK32_32-NEXT: lwz 26, 40(1) # 4-byte Folded Reload +; CHECK32_32-NEXT: lwz 25, 36(1) # 4-byte Folded Reload +; CHECK32_32-NEXT: lwz 24, 32(1) # 4-byte Folded Reload +; CHECK32_32-NEXT: lwz 23, 28(1) # 4-byte Folded Reload +; CHECK32_32-NEXT: lwz 22, 24(1) # 4-byte Folded Reload +; CHECK32_32-NEXT: lwz 21, 20(1) # 4-byte Folded Reload +; CHECK32_32-NEXT: lwz 20, 16(1) # 4-byte Folded Reload +; CHECK32_32-NEXT: lwz 19, 12(1) # 4-byte Folded Reload +; CHECK32_32-NEXT: addi 1, 1, 64 +; CHECK32_32-NEXT: blr +; +; CHECK32_64-LABEL: fshl_i128: +; CHECK32_64: # %bb.0: +; CHECK32_64-NEXT: stwu 1, -64(1) +; CHECK32_64-NEXT: lwz 12, 84(1) +; CHECK32_64-NEXT: rotlwi 11, 8, 31 +; CHECK32_64-NEXT: rotlwi 10, 10, 31 +; CHECK32_64-NEXT: rlwimi 10, 9, 31, 0, 0 +; CHECK32_64-NEXT: rlwimi 11, 7, 31, 0, 0 +; CHECK32_64-NEXT: stw 30, 56(1) # 4-byte Folded Spill +; CHECK32_64-NEXT: rotlwi 30, 9, 31 +; CHECK32_64-NEXT: stw 27, 44(1) # 4-byte Folded Spill +; CHECK32_64-NEXT: not 9, 12 +; CHECK32_64-NEXT: rlwimi 30, 8, 31, 0, 0 +; CHECK32_64-NEXT: andi. 8, 12, 127 +; CHECK32_64-NEXT: stw 22, 24(1) # 4-byte Folded Spill +; CHECK32_64-NEXT: mcrf 1, 0 +; CHECK32_64-NEXT: subfic 12, 8, 96 +; CHECK32_64-NEXT: addi 0, 8, -64 +; CHECK32_64-NEXT: subfic 27, 8, 32 +; CHECK32_64-NEXT: stw 23, 28(1) # 4-byte Folded Spill +; CHECK32_64-NEXT: andi. 9, 9, 127 +; CHECK32_64-NEXT: srw 12, 6, 12 +; CHECK32_64-NEXT: stw 25, 36(1) # 4-byte Folded Spill +; CHECK32_64-NEXT: subfic 25, 8, 64 +; CHECK32_64-NEXT: slw 23, 5, 0 +; CHECK32_64-NEXT: stw 26, 40(1) # 4-byte Folded Spill +; CHECK32_64-NEXT: addi 26, 8, -32 +; CHECK32_64-NEXT: srw 22, 4, 27 +; CHECK32_64-NEXT: srwi 7, 7, 1 +; CHECK32_64-NEXT: or 12, 23, 12 +; CHECK32_64-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; CHECK32_64-NEXT: slw 28, 3, 8 +; CHECK32_64-NEXT: srw 23, 6, 25 +; CHECK32_64-NEXT: stw 18, 8(1) # 4-byte Folded Spill +; CHECK32_64-NEXT: subfic 18, 9, 32 +; CHECK32_64-NEXT: or 28, 28, 22 +; CHECK32_64-NEXT: srw 22, 5, 27 +; CHECK32_64-NEXT: srw 27, 6, 27 +; CHECK32_64-NEXT: stw 20, 16(1) # 4-byte Folded Spill +; CHECK32_64-NEXT: srw 20, 5, 25 +; CHECK32_64-NEXT: subfic 25, 25, 32 +; CHECK32_64-NEXT: stw 21, 20(1) # 4-byte Folded Spill +; CHECK32_64-NEXT: slw 21, 4, 26 +; CHECK32_64-NEXT: slw 26, 6, 26 +; CHECK32_64-NEXT: or 28, 28, 21 +; CHECK32_64-NEXT: slw 21, 7, 18 +; CHECK32_64-NEXT: stw 24, 32(1) # 4-byte Folded Spill +; CHECK32_64-NEXT: slw 24, 5, 8 +; CHECK32_64-NEXT: slw 5, 5, 25 +; CHECK32_64-NEXT: stw 29, 52(1) # 4-byte Folded Spill +; CHECK32_64-NEXT: addi 29, 8, -96 +; CHECK32_64-NEXT: subfic 25, 9, 96 +; CHECK32_64-NEXT: slw 29, 6, 29 +; CHECK32_64-NEXT: or 27, 24, 27 +; CHECK32_64-NEXT: stw 19, 12(1) # 4-byte Folded Spill +; CHECK32_64-NEXT: srw 19, 11, 9 +; CHECK32_64-NEXT: addi 24, 9, -64 +; CHECK32_64-NEXT: or 12, 12, 29 +; CHECK32_64-NEXT: srw 29, 10, 9 +; CHECK32_64-NEXT: slw 25, 7, 25 +; CHECK32_64-NEXT: or 21, 19, 21 +; CHECK32_64-NEXT: srw 19, 11, 24 +; CHECK32_64-NEXT: or 5, 23, 5 +; CHECK32_64-NEXT: slw 23, 30, 18 +; CHECK32_64-NEXT: or 27, 27, 26 +; CHECK32_64-NEXT: addi 26, 9, -96 +; CHECK32_64-NEXT: or 25, 19, 25 +; CHECK32_64-NEXT: lwz 19, 12(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: or 29, 29, 23 +; CHECK32_64-NEXT: addi 23, 9, -32 +; CHECK32_64-NEXT: srw 26, 7, 26 +; CHECK32_64-NEXT: or 28, 28, 20 +; CHECK32_64-NEXT: srw 20, 7, 23 +; CHECK32_64-NEXT: or 26, 25, 26 +; CHECK32_64-NEXT: subfic 25, 9, 64 +; CHECK32_64-NEXT: srw 23, 30, 23 +; CHECK32_64-NEXT: or 29, 29, 23 +; CHECK32_64-NEXT: subfic 23, 25, 32 +; CHECK32_64-NEXT: or 5, 5, 22 +; CHECK32_64-NEXT: slw 22, 11, 18 +; CHECK32_64-NEXT: lwz 18, 8(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: srw 23, 11, 23 +; CHECK32_64-NEXT: slw 11, 11, 25 +; CHECK32_64-NEXT: slw 25, 7, 25 +; CHECK32_64-NEXT: cmplwi 5, 8, 64 +; CHECK32_64-NEXT: bc 12, 20, .LBB2_1 +; CHECK32_64-NEXT: b .LBB2_2 +; CHECK32_64-NEXT: .LBB2_1: +; CHECK32_64-NEXT: addi 12, 28, 0 +; CHECK32_64-NEXT: .LBB2_2: +; CHECK32_64-NEXT: lwz 28, 48(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: or 25, 25, 23 +; CHECK32_64-NEXT: bc 12, 6, .LBB2_4 +; CHECK32_64-NEXT: # %bb.3: +; CHECK32_64-NEXT: ori 3, 12, 0 +; CHECK32_64-NEXT: b .LBB2_4 +; CHECK32_64-NEXT: .LBB2_4: +; CHECK32_64-NEXT: slw 23, 6, 0 +; CHECK32_64-NEXT: slw 6, 6, 8 +; CHECK32_64-NEXT: slw 8, 4, 8 +; CHECK32_64-NEXT: cmplwi 6, 9, 64 +; CHECK32_64-NEXT: or 5, 8, 5 +; CHECK32_64-NEXT: bc 12, 20, .LBB2_6 +; CHECK32_64-NEXT: # %bb.5: +; CHECK32_64-NEXT: ori 5, 23, 0 +; CHECK32_64-NEXT: b .LBB2_6 +; CHECK32_64-NEXT: .LBB2_6: +; CHECK32_64-NEXT: lwz 23, 28(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: li 8, 0 +; CHECK32_64-NEXT: srw 24, 7, 24 +; CHECK32_64-NEXT: bc 12, 6, .LBB2_8 +; CHECK32_64-NEXT: # %bb.7: +; CHECK32_64-NEXT: ori 4, 5, 0 +; CHECK32_64-NEXT: b .LBB2_8 +; CHECK32_64-NEXT: .LBB2_8: +; CHECK32_64-NEXT: bc 12, 20, .LBB2_10 +; CHECK32_64-NEXT: # %bb.9: +; CHECK32_64-NEXT: ori 6, 8, 0 +; CHECK32_64-NEXT: b .LBB2_10 +; CHECK32_64-NEXT: .LBB2_10: +; CHECK32_64-NEXT: srw 7, 7, 9 +; CHECK32_64-NEXT: srw 9, 30, 9 +; CHECK32_64-NEXT: bc 12, 24, .LBB2_12 +; CHECK32_64-NEXT: # %bb.11: +; CHECK32_64-NEXT: ori 7, 8, 0 +; CHECK32_64-NEXT: b .LBB2_12 +; CHECK32_64-NEXT: .LBB2_12: +; CHECK32_64-NEXT: or 0, 25, 22 +; CHECK32_64-NEXT: or 11, 29, 11 +; CHECK32_64-NEXT: lwz 29, 52(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: bc 12, 24, .LBB2_14 +; CHECK32_64-NEXT: # %bb.13: +; CHECK32_64-NEXT: ori 5, 26, 0 +; CHECK32_64-NEXT: b .LBB2_15 +; CHECK32_64-NEXT: .LBB2_14: +; CHECK32_64-NEXT: addi 5, 11, 0 +; CHECK32_64-NEXT: .LBB2_15: +; CHECK32_64-NEXT: or 9, 9, 0 +; CHECK32_64-NEXT: or 21, 21, 20 +; CHECK32_64-NEXT: bc 12, 2, .LBB2_16 +; CHECK32_64-NEXT: b .LBB2_17 +; CHECK32_64-NEXT: .LBB2_16: +; CHECK32_64-NEXT: addi 5, 10, 0 +; CHECK32_64-NEXT: .LBB2_17: +; CHECK32_64-NEXT: bc 12, 24, .LBB2_19 +; CHECK32_64-NEXT: # %bb.18: +; CHECK32_64-NEXT: ori 0, 8, 0 +; CHECK32_64-NEXT: b .LBB2_20 +; CHECK32_64-NEXT: .LBB2_19: +; CHECK32_64-NEXT: addi 0, 21, 0 +; CHECK32_64-NEXT: .LBB2_20: +; CHECK32_64-NEXT: bc 12, 20, .LBB2_21 +; CHECK32_64-NEXT: b .LBB2_22 +; CHECK32_64-NEXT: .LBB2_21: +; CHECK32_64-NEXT: addi 8, 27, 0 +; CHECK32_64-NEXT: .LBB2_22: +; CHECK32_64-NEXT: lwz 27, 44(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: or 3, 3, 7 +; CHECK32_64-NEXT: bc 12, 24, .LBB2_24 +; CHECK32_64-NEXT: # %bb.23: +; CHECK32_64-NEXT: ori 7, 24, 0 +; CHECK32_64-NEXT: b .LBB2_25 +; CHECK32_64-NEXT: .LBB2_24: +; CHECK32_64-NEXT: addi 7, 9, 0 +; CHECK32_64-NEXT: .LBB2_25: +; CHECK32_64-NEXT: or 4, 4, 0 +; CHECK32_64-NEXT: bc 12, 2, .LBB2_26 +; CHECK32_64-NEXT: b .LBB2_27 +; CHECK32_64-NEXT: .LBB2_26: +; CHECK32_64-NEXT: addi 7, 30, 0 +; CHECK32_64-NEXT: .LBB2_27: +; CHECK32_64-NEXT: or 6, 6, 5 +; CHECK32_64-NEXT: or 5, 8, 7 +; CHECK32_64-NEXT: lwz 30, 56(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: lwz 26, 40(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: lwz 25, 36(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: lwz 24, 32(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: lwz 22, 24(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: lwz 21, 20(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: lwz 20, 16(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: addi 1, 1, 64 +; CHECK32_64-NEXT: blr +; +; CHECK64-LABEL: fshl_i128: +; CHECK64: # %bb.0: +; CHECK64-NEXT: clrlwi 8, 7, 25 +; CHECK64-NEXT: rotldi 5, 5, 63 +; CHECK64-NEXT: not 7, 7 +; CHECK64-NEXT: rldicl 9, 6, 63, 1 +; CHECK64-NEXT: subfic 10, 8, 64 +; CHECK64-NEXT: addi 11, 8, -64 +; CHECK64-NEXT: rldimi 5, 6, 63, 0 +; CHECK64-NEXT: clrlwi 6, 7, 25 +; CHECK64-NEXT: srd 7, 3, 10 +; CHECK64-NEXT: sld 10, 3, 11 +; CHECK64-NEXT: subfic 11, 6, 64 +; CHECK64-NEXT: addi 12, 6, -64 +; CHECK64-NEXT: sld 4, 4, 8 +; CHECK64-NEXT: srd 5, 5, 6 +; CHECK64-NEXT: sld 11, 9, 11 +; CHECK64-NEXT: or 4, 4, 7 +; CHECK64-NEXT: or 5, 5, 11 +; CHECK64-NEXT: srd 7, 9, 12 +; CHECK64-NEXT: or 4, 4, 10 +; CHECK64-NEXT: srd 6, 9, 6 +; CHECK64-NEXT: or 5, 5, 7 +; CHECK64-NEXT: sld 3, 3, 8 +; CHECK64-NEXT: or 4, 4, 6 +; CHECK64-NEXT: or 3, 3, 5 +; CHECK64-NEXT: blr + %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) + ret i128 %f +} + ; Verify that weird types are minimally supported. declare i37 @llvm.fshl.i37(i37, i37, i37) define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll index 2120cb2581b9b6..8775957f7b7d95 100644 --- a/llvm/test/CodeGen/X86/funnel-shift.ll +++ b/llvm/test/CodeGen/X86/funnel-shift.ll @@ -7,6 +7,7 @@ declare i16 @llvm.fshl.i16(i16, i16, i16) declare i32 @llvm.fshl.i32(i32, i32, i32) declare i64 @llvm.fshl.i64(i64, i64, i64) declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare i128 @llvm.fshl.i128(i128, i128, i128) declare i8 @llvm.fshr.i8(i8, i8, i8) declare i16 @llvm.fshr.i16(i16, i16, i16) @@ -36,6 +37,259 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind { ret i32 %f } +define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) nounwind { +; X32-SSE2-LABEL: fshl_i64: +; X32-SSE2: # %bb.0: +; X32-SSE2-NEXT: pushl %ebp +; X32-SSE2-NEXT: pushl %ebx +; X32-SSE2-NEXT: pushl %edi +; X32-SSE2-NEXT: pushl %esi +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %ch +; X32-SSE2-NEXT: movb %ch, %cl +; X32-SSE2-NEXT: notb %cl +; X32-SSE2-NEXT: shrdl $1, %eax, %esi +; X32-SSE2-NEXT: movl %eax, %ebx +; X32-SSE2-NEXT: shrl %ebx +; X32-SSE2-NEXT: shrdl %cl, %ebx, %esi +; X32-SSE2-NEXT: shrl %cl, %ebx +; X32-SSE2-NEXT: xorl %ebp, %ebp +; X32-SSE2-NEXT: testb $32, %cl +; X32-SSE2-NEXT: cmovnel %ebx, %esi +; X32-SSE2-NEXT: cmovnel %ebp, %ebx +; X32-SSE2-NEXT: movl %edi, %eax +; X32-SSE2-NEXT: movb %ch, %cl +; X32-SSE2-NEXT: shll %cl, %eax +; X32-SSE2-NEXT: shldl %cl, %edi, %edx +; X32-SSE2-NEXT: testb $32, %ch +; X32-SSE2-NEXT: cmovnel %eax, %edx +; X32-SSE2-NEXT: cmovnel %ebp, %eax +; X32-SSE2-NEXT: orl %esi, %eax +; X32-SSE2-NEXT: orl %ebx, %edx +; X32-SSE2-NEXT: popl %esi +; X32-SSE2-NEXT: popl %edi +; X32-SSE2-NEXT: popl %ebx +; X32-SSE2-NEXT: popl %ebp +; X32-SSE2-NEXT: retl +; +; X64-AVX2-LABEL: fshl_i64: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: movq %rdx, %rcx +; X64-AVX2-NEXT: movq %rdi, %rax +; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-AVX2-NEXT: shldq %cl, %rsi, %rax +; X64-AVX2-NEXT: retq + %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z) + ret i64 %f +} + +define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind { +; X32-SSE2-LABEL: fshl_i128: +; X32-SSE2: # %bb.0: +; X32-SSE2-NEXT: pushl %ebp +; X32-SSE2-NEXT: pushl %ebx +; X32-SSE2-NEXT: pushl %edi +; X32-SSE2-NEXT: pushl %esi +; X32-SSE2-NEXT: subl $64, %esp +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-SSE2-NEXT: movl %edi, %esi +; X32-SSE2-NEXT: shldl $31, %ecx, %esi +; X32-SSE2-NEXT: notl %edx +; X32-SSE2-NEXT: andl $127, %edx +; X32-SSE2-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-SSE2-NEXT: movb $64, %cl +; X32-SSE2-NEXT: subb %dl, %cl +; X32-SSE2-NEXT: shrl %edi +; X32-SSE2-NEXT: movl %edi, %ebx +; X32-SSE2-NEXT: shldl %cl, %esi, %ebx +; X32-SSE2-NEXT: movl %esi, %ebp +; X32-SSE2-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: shll %cl, %ebp +; X32-SSE2-NEXT: xorl %eax, %eax +; X32-SSE2-NEXT: testb $32, %cl +; X32-SSE2-NEXT: cmovnel %ebp, %ebx +; X32-SSE2-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: cmovnel %eax, %ebp +; X32-SSE2-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE2-NEXT: andl $127, %eax +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-SSE2-NEXT: movl %eax, %ecx +; X32-SSE2-NEXT: shldl %cl, %ebp, %edx +; X32-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-SSE2-NEXT: addb $-64, %cl +; X32-SSE2-NEXT: movl %esi, %edx +; X32-SSE2-NEXT: shrdl %cl, %edi, %edx +; X32-SSE2-NEXT: movl %edi, %esi +; X32-SSE2-NEXT: shrl %cl, %esi +; X32-SSE2-NEXT: testb $32, %cl +; X32-SSE2-NEXT: cmovnel %esi, %edx +; X32-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: movl $0, %ecx +; X32-SSE2-NEXT: cmovnel %ecx, %esi +; X32-SSE2-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-SSE2-NEXT: movl %eax, %ecx +; X32-SSE2-NEXT: shldl %cl, %esi, %ebx +; X32-SSE2-NEXT: movl %esi, %edx +; X32-SSE2-NEXT: shll %cl, %edx +; X32-SSE2-NEXT: shll %cl, %ebp +; X32-SSE2-NEXT: testb $32, %al +; X32-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-SSE2-NEXT: cmovnel %ebp, %ecx +; X32-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: cmovnel %edx, %ebx +; X32-SSE2-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: movl $0, %ecx +; X32-SSE2-NEXT: cmovnel %ecx, %ebp +; X32-SSE2-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: cmovnel %ecx, %edx +; X32-SSE2-NEXT: xorl %ecx, %ecx +; X32-SSE2-NEXT: cmpl $64, %eax +; X32-SSE2-NEXT: cmovael %ecx, %edx +; X32-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-SSE2-NEXT: shldl $31, %eax, %ebx +; X32-SSE2-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-SSE2-NEXT: shrdl $1, %eax, %edx +; X32-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-SSE2-NEXT: shrdl %cl, %edi, %eax +; X32-SSE2-NEXT: shrl %cl, %edi +; X32-SSE2-NEXT: movl %edx, %ebp +; X32-SSE2-NEXT: shrdl %cl, %ebx, %ebp +; X32-SSE2-NEXT: movl %ebx, %edx +; X32-SSE2-NEXT: shrl %cl, %edx +; X32-SSE2-NEXT: testb $32, %cl +; X32-SSE2-NEXT: cmovnel %edx, %ebp +; X32-SSE2-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: cmovnel %edi, %eax +; X32-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: movl $0, %ebp +; X32-SSE2-NEXT: cmovnel %ebp, %edx +; X32-SSE2-NEXT: cmovnel %ebp, %edi +; X32-SSE2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-SSE2-NEXT: cmpl $64, %eax +; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-SSE2-NEXT: cmovael %ebp, %ecx +; X32-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: xorl %ebp, %ebp +; X32-SSE2-NEXT: movb $64, %ch +; X32-SSE2-NEXT: subb %al, %ch +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-SSE2-NEXT: movb %ch, %cl +; X32-SSE2-NEXT: shrl %cl, %edi +; X32-SSE2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: testb $32, %ch +; X32-SSE2-NEXT: cmovnel %ebp, %edi +; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-SSE2-NEXT: movb %al, %cl +; X32-SSE2-NEXT: addb $-64, %cl +; X32-SSE2-NEXT: movl %esi, %ebp +; X32-SSE2-NEXT: shll %cl, %ebp +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE2-NEXT: shldl %cl, %esi, %eax +; X32-SSE2-NEXT: testb $32, %cl +; X32-SSE2-NEXT: cmovnel %ebp, %eax +; X32-SSE2-NEXT: cmpl $64, (%esp) # 4-byte Folded Reload +; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-SSE2-NEXT: movl $0, %esi +; X32-SSE2-NEXT: cmovael %esi, %ebx +; X32-SSE2-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-SSE2-NEXT: cmpl $64, %ebx +; X32-SSE2-NEXT: cmovbl %edi, %eax +; X32-SSE2-NEXT: testb $32, %cl +; X32-SSE2-NEXT: movl $0, %esi +; X32-SSE2-NEXT: cmovnel %esi, %ebp +; X32-SSE2-NEXT: cmpl $64, (%esp) # 4-byte Folded Reload +; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-SSE2-NEXT: cmovael %esi, %edi +; X32-SSE2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-SSE2-NEXT: movb %ch, %cl +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-SSE2-NEXT: shrdl %cl, %edi, %esi +; X32-SSE2-NEXT: testb $32, %ch +; X32-SSE2-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-SSE2-NEXT: cmpl $64, %ebx +; X32-SSE2-NEXT: cmovael %ebp, %esi +; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-SSE2-NEXT: movl (%esp), %edi # 4-byte Reload +; X32-SSE2-NEXT: cmpl $64, %edi +; X32-SSE2-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-SSE2-NEXT: cmpl $64, %edi +; X32-SSE2-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-SSE2-NEXT: testl %edi, %edi +; X32-SSE2-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-SSE2-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-SSE2-NEXT: movl %ecx, %edi +; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-SSE2-NEXT: testl %ebx, %ebx +; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %eax +; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %esi +; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-SSE2-NEXT: movl %eax, 12(%ecx) +; X32-SSE2-NEXT: movl %esi, 8(%ecx) +; X32-SSE2-NEXT: movl %edx, 4(%ecx) +; X32-SSE2-NEXT: movl %edi, (%ecx) +; X32-SSE2-NEXT: movl %ecx, %eax +; X32-SSE2-NEXT: addl $64, %esp +; X32-SSE2-NEXT: popl %esi +; X32-SSE2-NEXT: popl %edi +; X32-SSE2-NEXT: popl %ebx +; X32-SSE2-NEXT: popl %ebp +; X32-SSE2-NEXT: retl $4 +; +; X64-AVX2-LABEL: fshl_i128: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: movq %r8, %r9 +; X64-AVX2-NEXT: movq %rcx, %r10 +; X64-AVX2-NEXT: movq %rdx, %r8 +; X64-AVX2-NEXT: movq %rsi, %rdx +; X64-AVX2-NEXT: movl %r9d, %ecx +; X64-AVX2-NEXT: shldq %cl, %rdi, %rdx +; X64-AVX2-NEXT: shrdq $1, %r10, %r8 +; X64-AVX2-NEXT: shrq %r10 +; X64-AVX2-NEXT: notb %cl +; X64-AVX2-NEXT: shrdq %cl, %r10, %r8 +; X64-AVX2-NEXT: shrq %cl, %r10 +; X64-AVX2-NEXT: xorl %eax, %eax +; X64-AVX2-NEXT: testb $64, %cl +; X64-AVX2-NEXT: cmovneq %r10, %r8 +; X64-AVX2-NEXT: cmovneq %rax, %r10 +; X64-AVX2-NEXT: movl %r9d, %ecx +; X64-AVX2-NEXT: shlq %cl, %rdi +; X64-AVX2-NEXT: testb $64, %r9b +; X64-AVX2-NEXT: cmovneq %rdi, %rdx +; X64-AVX2-NEXT: cmoveq %rdi, %rax +; X64-AVX2-NEXT: orq %r8, %rax +; X64-AVX2-NEXT: orq %r10, %rdx +; X64-AVX2-NEXT: retq + %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) + ret i128 %f +} + ; Verify that weird types are minimally supported. declare i37 @llvm.fshl.i37(i37, i37, i37) define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind { @@ -916,13 +1170,13 @@ define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind { ; X32-SSE2-NEXT: shldl $24, %ebx, %edi ; X32-SSE2-NEXT: xorl %eax, %edi ; X32-SSE2-NEXT: orl %edi, %ecx -; X32-SSE2-NEXT: jne .LBB44_1 +; X32-SSE2-NEXT: jne .LBB46_1 ; X32-SSE2-NEXT: # %bb.2: ; X32-SSE2-NEXT: popl %esi ; X32-SSE2-NEXT: popl %edi ; X32-SSE2-NEXT: popl %ebx ; X32-SSE2-NEXT: jmp _Z3foov # TAILCALL -; X32-SSE2-NEXT: .LBB44_1: +; X32-SSE2-NEXT: .LBB46_1: ; X32-SSE2-NEXT: popl %esi ; X32-SSE2-NEXT: popl %edi ; X32-SSE2-NEXT: popl %ebx @@ -939,10 +1193,10 @@ define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind { ; X64-AVX2-NEXT: movq (%rsi,%rcx,4), %rcx ; X64-AVX2-NEXT: shrdq $40, %rdi, %rcx ; X64-AVX2-NEXT: cmpq %rax, %rcx -; X64-AVX2-NEXT: jne .LBB44_1 +; X64-AVX2-NEXT: jne .LBB46_1 ; X64-AVX2-NEXT: # %bb.2: ; X64-AVX2-NEXT: jmp _Z3foov # TAILCALL -; X64-AVX2-NEXT: .LBB44_1: +; X64-AVX2-NEXT: .LBB46_1: ; X64-AVX2-NEXT: retq %3 = sext i32 %0 to i64 %4 = getelementptr inbounds %struct.S, %struct.S* %1, i64 %3