diff --git a/llvm/test/CodeGen/PowerPC/perfect-shuffle.ll b/llvm/test/CodeGen/PowerPC/perfect-shuffle.ll new file mode 100644 index 0000000000000..ee7d823d5e9aa --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/perfect-shuffle.ll @@ -0,0 +1,139 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple powerpc64 -mcpu=pwr10 < %s | FileCheck %s --check-prefix=BE +; RUN: llc -mtriple powerpc64le -mcpu=pwr10 < %s | FileCheck %s --check-prefix=LE + +define <4 x float> @shuffle1(<16 x i8> %v1, <16 x i8> %v2) { +; BE-LABEL: shuffle1: +; BE: # %bb.0: +; BE-NEXT: vmrglw 4, 2, 3 +; BE-NEXT: vmrghw 2, 2, 3 +; BE-NEXT: vmrghw 2, 2, 4 +; BE-NEXT: blr +; +; LE-LABEL: shuffle1: +; LE: # %bb.0: +; LE-NEXT: vpkudum 2, 3, 2 +; LE-NEXT: blr + %shuf = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> + %cast = bitcast <16 x i8> %shuf to <4 x float> + ret <4 x float> %cast +} + +define <4 x float> @shuffle2(<16 x i8> %v1, <16 x i8> %v2) { +; BE-LABEL: shuffle2: +; BE: # %bb.0: +; BE-NEXT: vpkudum 2, 2, 3 +; BE-NEXT: blr +; +; LE-LABEL: shuffle2: +; LE: # %bb.0: +; LE-NEXT: plxv 36, .LCPI1_0@PCREL(0), 1 +; LE-NEXT: vperm 2, 3, 2, 4 +; LE-NEXT: blr + %shuf = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> + %cast = bitcast <16 x i8> %shuf to <4 x float> + ret <4 x float> %cast +} + +define <4 x float> @shuffle3(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3, <16 x i8> %v4) { +; BE-LABEL: shuffle3: +; BE: # %bb.0: +; BE-NEXT: vmrglw 0, 2, 3 +; BE-NEXT: vmrghw 2, 2, 3 +; BE-NEXT: vmrglw 3, 4, 5 +; BE-NEXT: vmrghw 4, 4, 5 +; BE-NEXT: vmrghw 2, 2, 0 +; BE-NEXT: vmrghw 3, 4, 3 +; BE-NEXT: xvaddsp 34, 34, 35 +; BE-NEXT: blr +; +; LE-LABEL: shuffle3: +; LE: # %bb.0: +; LE-NEXT: vpkudum 2, 3, 2 +; LE-NEXT: vpkudum 3, 5, 4 +; LE-NEXT: xvaddsp 34, 34, 35 +; LE-NEXT: blr + %shuf1 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> + %shuf2 = shufflevector <16 x i8> %v3, <16 x i8> %v4, <16 x i32> + %cast1 = bitcast <16 x i8> %shuf1 to <4 x float> + %cast2 = bitcast <16 x i8> %shuf2 to <4 x float> + %add = fadd <4 x float> %cast1, %cast2 + ret <4 x float> %add +} + +define <4 x float> @shuffle4(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3, <16 x i8> %v4) { +; BE-LABEL: shuffle4: +; BE: # %bb.0: +; BE-NEXT: vpkudum 2, 2, 3 +; BE-NEXT: vpkudum 3, 4, 5 +; BE-NEXT: xvaddsp 34, 34, 35 +; BE-NEXT: blr +; +; LE-LABEL: shuffle4: +; LE: # %bb.0: +; LE-NEXT: plxv 32, .LCPI3_0@PCREL(0), 1 +; LE-NEXT: vperm 2, 3, 2, 0 +; LE-NEXT: vperm 3, 5, 4, 0 +; LE-NEXT: xvaddsp 34, 34, 35 +; LE-NEXT: blr + %shuf1 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> + %shuf2 = shufflevector <16 x i8> %v3, <16 x i8> %v4, <16 x i32> + %cast1 = bitcast <16 x i8> %shuf1 to <4 x float> + %cast2 = bitcast <16 x i8> %shuf2 to <4 x float> + %add = fadd <4 x float> %cast1, %cast2 + ret <4 x float> %add +} + +define <4 x float> @shuffle5(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3, <16 x i8> %v4) { +; BE-LABEL: shuffle5: +; BE: # %bb.0: # %entry +; BE-NEXT: vmrglw 0, 2, 3 +; BE-NEXT: vmrghw 3, 2, 3 +; BE-NEXT: li 3, 8 +; BE-NEXT: vextublx 3, 3, 2 +; BE-NEXT: vmrghw 3, 3, 0 +; BE-NEXT: andi. 3, 3, 255 +; BE-NEXT: vmr 2, 3 +; BE-NEXT: beq 0, .LBB4_2 +; BE-NEXT: # %bb.1: # %exit +; BE-NEXT: xvaddsp 34, 35, 34 +; BE-NEXT: blr +; BE-NEXT: .LBB4_2: # %second +; BE-NEXT: vmrglw 2, 4, 5 +; BE-NEXT: vmrghw 4, 4, 5 +; BE-NEXT: vmrghw 2, 4, 2 +; BE-NEXT: xvaddsp 34, 35, 34 +; BE-NEXT: blr +; +; LE-LABEL: shuffle5: +; LE: # %bb.0: # %entry +; LE-NEXT: vpkudum 3, 3, 2 +; LE-NEXT: li 3, 8 +; LE-NEXT: vextubrx 3, 3, 2 +; LE-NEXT: vmr 2, 3 +; LE-NEXT: andi. 3, 3, 255 +; LE-NEXT: beq 0, .LBB4_2 +; LE-NEXT: # %bb.1: # %exit +; LE-NEXT: xvaddsp 34, 35, 34 +; LE-NEXT: blr +; LE-NEXT: .LBB4_2: # %second +; LE-NEXT: vpkudum 2, 5, 4 +; LE-NEXT: xvaddsp 34, 35, 34 +; LE-NEXT: blr +entry: + %shuf1 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> + %fetch = extractelement <16 x i8> %shuf1, i32 4 + %icmp = icmp eq i8 %fetch, 0 + br i1 %icmp, label %second, label %exit + +second: + %shufs = shufflevector <16 x i8> %v3, <16 x i8> %v4, <16 x i32> + br label %exit + +exit: + %shuf2 = phi <16 x i8> [%shuf1, %entry], [%shufs, %second] + %cast1 = bitcast <16 x i8> %shuf1 to <4 x float> + %cast2 = bitcast <16 x i8> %shuf2 to <4 x float> + %add = fadd <4 x float> %cast1, %cast2 + ret <4 x float> %add +}