diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td index b38dd4ae948c6..fc3cde3f464bb 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrMMA.td +++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td @@ -202,7 +202,7 @@ multiclass ACC_UM_M244_XO46 opcode, bits<8> xo, dag IOL, string asmbase, RegConstraint<"@earlyclobber $AT">; def PM#NAME#WPP : MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0x20), (outs acc:$AT), + opcode, !or(xo, 0x20), (outs wacc:$AT), !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), @@ -765,7 +765,7 @@ let Predicates = [MMA, IsISAFuture] in { def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)), (XVF64GERWPN $ATi, $XA, RCCp.BToVSRC)>; def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)), - (XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>; + (XVF64GERWNP $ATi, $XA, RCCp.BToVSRC)>; def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)), (XVF64GERWNN $ATi, $XA, RCCp.BToVSRC)>; diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll b/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll index 7e2f744ac1d71..94121f09e36be 100644 --- a/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-LE-WACC +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, ptr nocapture noundef writeonly %c) local_unnamed_addr #0 { ; CHECK-LABEL: testMultiply: @@ -91,6 +97,91 @@ define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture nound ; CHECK-BE-NEXT: ld r30, -16(r1) ; CHECK-BE-NEXT: mtlr r0 ; CHECK-BE-NEXT: blr +; +; CHECK-LE-WACC-LABEL: testMultiply: +; CHECK-LE-WACC: # %bb.0: # %entry +; CHECK-LE-WACC-NEXT: mflr r0 +; CHECK-LE-WACC-NEXT: std r30, -16(r1) +; CHECK-LE-WACC-NEXT: std r0, 16(r1) +; CHECK-LE-WACC-NEXT: clrldi r0, r1, 59 +; CHECK-LE-WACC-NEXT: subfic r0, r0, -128 +; CHECK-LE-WACC-NEXT: mr r30, r1 +; CHECK-LE-WACC-NEXT: stdux r1, r1, r0 +; CHECK-LE-WACC-NEXT: stxv v30, -64(r30) # 16-byte Folded Spill +; CHECK-LE-WACC-NEXT: stxv v31, -48(r30) # 16-byte Folded Spill +; CHECK-LE-WACC-NEXT: lxv v31, 0(r3) +; CHECK-LE-WACC-NEXT: lxv v30, 0(r4) +; CHECK-LE-WACC-NEXT: addi r3, r1, 32 +; CHECK-LE-WACC-NEXT: std r29, -24(r30) # 8-byte Folded Spill +; CHECK-LE-WACC-NEXT: vmr v2, v31 +; CHECK-LE-WACC-NEXT: vmr v3, v30 +; CHECK-LE-WACC-NEXT: mr r29, r5 +; CHECK-LE-WACC-NEXT: bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_@notoc +; CHECK-LE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-LE-WACC-NEXT: xvf32gerpp wacc0, v31, v30 +; CHECK-LE-WACC-NEXT: lxv vs0, 48(r1) +; CHECK-LE-WACC-NEXT: lxv vs1, 32(r1) +; CHECK-LE-WACC-NEXT: xvf32gerpp wacc0, vs1, vs0 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxv v5, 0(r29) +; CHECK-LE-WACC-NEXT: pstxv v4, 8(r29), 0 +; CHECK-LE-WACC-NEXT: stxv v3, 16(r29) +; CHECK-LE-WACC-NEXT: pstxv v2, 24(r29), 0 +; CHECK-LE-WACC-NEXT: lxv v31, -48(r30) # 16-byte Folded Reload +; CHECK-LE-WACC-NEXT: lxv v30, -64(r30) # 16-byte Folded Reload +; CHECK-LE-WACC-NEXT: ld r29, -24(r30) # 8-byte Folded Reload +; CHECK-LE-WACC-NEXT: mr r1, r30 +; CHECK-LE-WACC-NEXT: ld r0, 16(r1) +; CHECK-LE-WACC-NEXT: ld r30, -16(r1) +; CHECK-LE-WACC-NEXT: mtlr r0 +; CHECK-LE-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: testMultiply: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: mflr r0 +; CHECK-BE-WACC-NEXT: std r30, -16(r1) +; CHECK-BE-WACC-NEXT: std r0, 16(r1) +; CHECK-BE-WACC-NEXT: clrldi r0, r1, 59 +; CHECK-BE-WACC-NEXT: subfic r0, r0, -224 +; CHECK-BE-WACC-NEXT: mr r30, r1 +; CHECK-BE-WACC-NEXT: stdux r1, r1, r0 +; CHECK-BE-WACC-NEXT: stxv v30, -64(r30) # 16-byte Folded Spill +; CHECK-BE-WACC-NEXT: stxv v31, -48(r30) # 16-byte Folded Spill +; CHECK-BE-WACC-NEXT: lxv v31, 0(r3) +; CHECK-BE-WACC-NEXT: lxv v30, 0(r4) +; CHECK-BE-WACC-NEXT: addi r3, r1, 128 +; CHECK-BE-WACC-NEXT: std r29, -24(r30) # 8-byte Folded Spill +; CHECK-BE-WACC-NEXT: vmr v2, v31 +; CHECK-BE-WACC-NEXT: vmr v3, v30 +; CHECK-BE-WACC-NEXT: mr r29, r5 +; CHECK-BE-WACC-NEXT: bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_ +; CHECK-BE-WACC-NEXT: nop +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, v31, v30 +; CHECK-BE-WACC-NEXT: lxv vs0, 128(r1) +; CHECK-BE-WACC-NEXT: lxv vs1, 144(r1) +; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, vs0, vs1 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: vmr v1, v2 +; CHECK-BE-WACC-NEXT: vmr v7, v4 +; CHECK-BE-WACC-NEXT: vmr v0, v3 +; CHECK-BE-WACC-NEXT: vmr v6, v5 +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v2, 0(r29) +; CHECK-BE-WACC-NEXT: pstxv v3, 8(r29), 0 +; CHECK-BE-WACC-NEXT: stxv v4, 16(r29) +; CHECK-BE-WACC-NEXT: pstxv v5, 24(r29), 0 +; CHECK-BE-WACC-NEXT: lxv v31, -48(r30) # 16-byte Folded Reload +; CHECK-BE-WACC-NEXT: lxv v30, -64(r30) # 16-byte Folded Reload +; CHECK-BE-WACC-NEXT: ld r29, -24(r30) # 8-byte Folded Reload +; CHECK-BE-WACC-NEXT: mr r1, r30 +; CHECK-BE-WACC-NEXT: ld r0, 16(r1) +; CHECK-BE-WACC-NEXT: ld r30, -16(r1) +; CHECK-BE-WACC-NEXT: mtlr r0 +; CHECK-BE-WACC-NEXT: blr entry: %vP = alloca <256 x i1>, align 32 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %vP) diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll index 059d60a9608f8..bc5d5bed36e9b 100644 --- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll @@ -3,10 +3,18 @@ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -disable-auto-paired-vec-st=false < %s | FileCheck %s \ ; RUN: --check-prefix=LE-PAIRED +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -disable-auto-paired-vec-st=false < %s | FileCheck %s \ +; RUN: --check-prefix=LE-PAIRED-WACC ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr -disable-auto-paired-vec-st=false < %s | \ ; RUN: FileCheck %s --check-prefix=BE-PAIRED +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -disable-auto-paired-vec-st=false < %s | \ +; RUN: FileCheck %s --check-prefix=BE-PAIRED-WACC ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \ ; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \ ; RUN: | FileCheck %s --check-prefix=LE-PWR9 @@ -36,6 +44,20 @@ define dso_local void @testLdSt(i64 %SrcIdx, i64 %DstIdx) { ; LE-PAIRED-NEXT: pstxv vs3, f@PCREL+128(0), 1 ; LE-PAIRED-NEXT: blr ; +; LE-PAIRED-WACC-LABEL: testLdSt: +; LE-PAIRED-WACC: # %bb.0: # %entry +; LE-PAIRED-WACC-NEXT: plxv v3, f@PCREL+64(0), 1 +; LE-PAIRED-WACC-NEXT: plxv v5, f@PCREL+96(0), 1 +; LE-PAIRED-WACC-NEXT: plxv v2, f@PCREL+80(0), 1 +; LE-PAIRED-WACC-NEXT: plxv v4, f@PCREL+112(0), 1 +; LE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; LE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; LE-PAIRED-WACC-NEXT: pstxv v4, f@PCREL+176(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv v5, f@PCREL+160(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv v2, f@PCREL+144(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv v3, f@PCREL+128(0), 1 +; LE-PAIRED-WACC-NEXT: blr +; ; BE-PAIRED-LABEL: testLdSt: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r3, r2, f@toc@ha @@ -50,6 +72,22 @@ define dso_local void @testLdSt(i64 %SrcIdx, i64 %DstIdx) { ; BE-PAIRED-NEXT: stxv vs2, 160(r3) ; BE-PAIRED-NEXT: blr ; +; BE-PAIRED-WACC-LABEL: testLdSt: +; BE-PAIRED-WACC: # %bb.0: # %entry +; BE-PAIRED-WACC-NEXT: addis r3, r2, f@toc@ha +; BE-PAIRED-WACC-NEXT: addi r3, r3, f@toc@l +; BE-PAIRED-WACC-NEXT: lxv v3, 112(r3) +; BE-PAIRED-WACC-NEXT: lxv v5, 80(r3) +; BE-PAIRED-WACC-NEXT: lxv v2, 96(r3) +; BE-PAIRED-WACC-NEXT: lxv v4, 64(r3) +; BE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; BE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; BE-PAIRED-WACC-NEXT: stxv v5, 176(r3) +; BE-PAIRED-WACC-NEXT: stxv v4, 160(r3) +; BE-PAIRED-WACC-NEXT: stxv v3, 144(r3) +; BE-PAIRED-WACC-NEXT: stxv v2, 128(r3) +; BE-PAIRED-WACC-NEXT: blr +; ; LE-PWR9-LABEL: testLdSt: ; LE-PWR9: # %bb.0: # %entry ; LE-PWR9-NEXT: addis r3, r2, f@toc@ha @@ -147,6 +185,25 @@ define dso_local void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) { ; LE-PAIRED-NEXT: stxv vs2, 16(r4) ; LE-PAIRED-NEXT: blr ; +; LE-PAIRED-WACC-LABEL: testXLdSt: +; LE-PAIRED-WACC: # %bb.0: # %entry +; LE-PAIRED-WACC-NEXT: paddi r5, 0, f@PCREL, 1 +; LE-PAIRED-WACC-NEXT: sldi r3, r3, 6 +; LE-PAIRED-WACC-NEXT: add r6, r5, r3 +; LE-PAIRED-WACC-NEXT: lxvx v3, r5, r3 +; LE-PAIRED-WACC-NEXT: lxv v2, 16(r6) +; LE-PAIRED-WACC-NEXT: lxv v5, 32(r6) +; LE-PAIRED-WACC-NEXT: lxv v4, 48(r6) +; LE-PAIRED-WACC-NEXT: sldi r3, r4, 6 +; LE-PAIRED-WACC-NEXT: add r4, r5, r3 +; LE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; LE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; LE-PAIRED-WACC-NEXT: stxvx v3, r5, r3 +; LE-PAIRED-WACC-NEXT: stxv v4, 48(r4) +; LE-PAIRED-WACC-NEXT: stxv v5, 32(r4) +; LE-PAIRED-WACC-NEXT: stxv v2, 16(r4) +; LE-PAIRED-WACC-NEXT: blr +; ; BE-PAIRED-LABEL: testXLdSt: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r5, r2, f@toc@ha @@ -165,6 +222,26 @@ define dso_local void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) { ; BE-PAIRED-NEXT: stxv vs2, 32(r4) ; BE-PAIRED-NEXT: blr ; +; BE-PAIRED-WACC-LABEL: testXLdSt: +; BE-PAIRED-WACC: # %bb.0: # %entry +; BE-PAIRED-WACC-NEXT: addis r5, r2, f@toc@ha +; BE-PAIRED-WACC-NEXT: addi r5, r5, f@toc@l +; BE-PAIRED-WACC-NEXT: sldi r3, r3, 6 +; BE-PAIRED-WACC-NEXT: add r6, r5, r3 +; BE-PAIRED-WACC-NEXT: lxvx v2, r5, r3 +; BE-PAIRED-WACC-NEXT: lxv v5, 48(r6) +; BE-PAIRED-WACC-NEXT: lxv v3, 16(r6) +; BE-PAIRED-WACC-NEXT: lxv v4, 32(r6) +; BE-PAIRED-WACC-NEXT: sldi r3, r4, 6 +; BE-PAIRED-WACC-NEXT: add r4, r5, r3 +; BE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; BE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; BE-PAIRED-WACC-NEXT: stxvx v2, r5, r3 +; BE-PAIRED-WACC-NEXT: stxv v5, 48(r4) +; BE-PAIRED-WACC-NEXT: stxv v4, 32(r4) +; BE-PAIRED-WACC-NEXT: stxv v3, 16(r4) +; BE-PAIRED-WACC-NEXT: blr +; ; LE-PWR9-LABEL: testXLdSt: ; LE-PWR9: # %bb.0: # %entry ; LE-PWR9-NEXT: addis r5, r2, f@toc@ha @@ -263,6 +340,20 @@ define dso_local void @testUnalignedLdSt() { ; LE-PAIRED-NEXT: pstxv vs3, f@PCREL+19(0), 1 ; LE-PAIRED-NEXT: blr ; +; LE-PAIRED-WACC-LABEL: testUnalignedLdSt: +; LE-PAIRED-WACC: # %bb.0: # %entry +; LE-PAIRED-WACC-NEXT: plxv v3, f@PCREL+11(0), 1 +; LE-PAIRED-WACC-NEXT: plxv v5, f@PCREL+43(0), 1 +; LE-PAIRED-WACC-NEXT: plxv v2, f@PCREL+27(0), 1 +; LE-PAIRED-WACC-NEXT: plxv v4, f@PCREL+59(0), 1 +; LE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; LE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; LE-PAIRED-WACC-NEXT: pstxv v4, f@PCREL+67(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv v5, f@PCREL+51(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv v2, f@PCREL+35(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv v3, f@PCREL+19(0), 1 +; LE-PAIRED-WACC-NEXT: blr +; ; BE-PAIRED-LABEL: testUnalignedLdSt: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r3, r2, f@toc@ha @@ -277,6 +368,22 @@ define dso_local void @testUnalignedLdSt() { ; BE-PAIRED-NEXT: pstxv vs2, 51(r3), 0 ; BE-PAIRED-NEXT: blr ; +; BE-PAIRED-WACC-LABEL: testUnalignedLdSt: +; BE-PAIRED-WACC: # %bb.0: # %entry +; BE-PAIRED-WACC-NEXT: addis r3, r2, f@toc@ha +; BE-PAIRED-WACC-NEXT: addi r3, r3, f@toc@l +; BE-PAIRED-WACC-NEXT: plxv v3, 59(r3), 0 +; BE-PAIRED-WACC-NEXT: plxv v5, 27(r3), 0 +; BE-PAIRED-WACC-NEXT: plxv v2, 43(r3), 0 +; BE-PAIRED-WACC-NEXT: plxv v4, 11(r3), 0 +; BE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; BE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; BE-PAIRED-WACC-NEXT: pstxv v5, 67(r3), 0 +; BE-PAIRED-WACC-NEXT: pstxv v4, 51(r3), 0 +; BE-PAIRED-WACC-NEXT: pstxv v3, 35(r3), 0 +; BE-PAIRED-WACC-NEXT: pstxv v2, 19(r3), 0 +; BE-PAIRED-WACC-NEXT: blr +; ; LE-PWR9-LABEL: testUnalignedLdSt: ; LE-PWR9: # %bb.0: # %entry ; LE-PWR9-NEXT: addis r3, r2, f@toc@ha @@ -381,6 +488,14 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) { ; LE-PAIRED-NEXT: pstxv vs1, g@PCREL+64(0), 1 ; LE-PAIRED-NEXT: blr ; +; LE-PAIRED-WACC-LABEL: testLdStPair: +; LE-PAIRED-WACC: # %bb.0: # %entry +; LE-PAIRED-WACC-NEXT: plxv vs0, g@PCREL+48(0), 1 +; LE-PAIRED-WACC-NEXT: plxv vs1, g@PCREL+32(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv vs0, g@PCREL+80(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv vs1, g@PCREL+64(0), 1 +; LE-PAIRED-WACC-NEXT: blr +; ; BE-PAIRED-LABEL: testLdStPair: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r3, r2, g@toc@ha @@ -391,6 +506,16 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) { ; BE-PAIRED-NEXT: stxv vs0, 64(r3) ; BE-PAIRED-NEXT: blr ; +; BE-PAIRED-WACC-LABEL: testLdStPair: +; BE-PAIRED-WACC: # %bb.0: # %entry +; BE-PAIRED-WACC-NEXT: addis r3, r2, g@toc@ha +; BE-PAIRED-WACC-NEXT: addi r3, r3, g@toc@l +; BE-PAIRED-WACC-NEXT: lxv vs0, 48(r3) +; BE-PAIRED-WACC-NEXT: lxv vs1, 32(r3) +; BE-PAIRED-WACC-NEXT: stxv vs0, 80(r3) +; BE-PAIRED-WACC-NEXT: stxv vs1, 64(r3) +; BE-PAIRED-WACC-NEXT: blr +; ; LE-PWR9-LABEL: testLdStPair: ; LE-PWR9: # %bb.0: # %entry ; LE-PWR9-NEXT: addis r3, r2, g@toc@ha @@ -460,6 +585,19 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) { ; LE-PAIRED-NEXT: stxv vs1, 16(r4) ; LE-PAIRED-NEXT: blr ; +; LE-PAIRED-WACC-LABEL: testXLdStPair: +; LE-PAIRED-WACC: # %bb.0: # %entry +; LE-PAIRED-WACC-NEXT: sldi r3, r3, 5 +; LE-PAIRED-WACC-NEXT: paddi r5, 0, g@PCREL, 1 +; LE-PAIRED-WACC-NEXT: add r6, r5, r3 +; LE-PAIRED-WACC-NEXT: lxvx vs0, r5, r3 +; LE-PAIRED-WACC-NEXT: lxv vs1, 16(r6) +; LE-PAIRED-WACC-NEXT: sldi r3, r4, 5 +; LE-PAIRED-WACC-NEXT: add r4, r5, r3 +; LE-PAIRED-WACC-NEXT: stxvx vs0, r5, r3 +; LE-PAIRED-WACC-NEXT: stxv vs1, 16(r4) +; LE-PAIRED-WACC-NEXT: blr +; ; BE-PAIRED-LABEL: testXLdStPair: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r5, r2, g@toc@ha @@ -474,6 +612,20 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) { ; BE-PAIRED-NEXT: stxv vs1, 16(r4) ; BE-PAIRED-NEXT: blr ; +; BE-PAIRED-WACC-LABEL: testXLdStPair: +; BE-PAIRED-WACC: # %bb.0: # %entry +; BE-PAIRED-WACC-NEXT: addis r5, r2, g@toc@ha +; BE-PAIRED-WACC-NEXT: sldi r3, r3, 5 +; BE-PAIRED-WACC-NEXT: addi r5, r5, g@toc@l +; BE-PAIRED-WACC-NEXT: add r6, r5, r3 +; BE-PAIRED-WACC-NEXT: lxvx vs0, r5, r3 +; BE-PAIRED-WACC-NEXT: lxv vs1, 16(r6) +; BE-PAIRED-WACC-NEXT: sldi r3, r4, 5 +; BE-PAIRED-WACC-NEXT: add r4, r5, r3 +; BE-PAIRED-WACC-NEXT: stxvx vs0, r5, r3 +; BE-PAIRED-WACC-NEXT: stxv vs1, 16(r4) +; BE-PAIRED-WACC-NEXT: blr +; ; LE-PWR9-LABEL: testXLdStPair: ; LE-PWR9: # %bb.0: # %entry ; LE-PWR9-NEXT: addis r5, r2, g@toc@ha @@ -548,6 +700,14 @@ define dso_local void @testUnalignedLdStPair() { ; LE-PAIRED-NEXT: pstxv vs1, g@PCREL+19(0), 1 ; LE-PAIRED-NEXT: blr ; +; LE-PAIRED-WACC-LABEL: testUnalignedLdStPair: +; LE-PAIRED-WACC: # %bb.0: # %entry +; LE-PAIRED-WACC-NEXT: plxv vs0, g@PCREL+27(0), 1 +; LE-PAIRED-WACC-NEXT: plxv vs1, g@PCREL+11(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv vs0, g@PCREL+35(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv vs1, g@PCREL+19(0), 1 +; LE-PAIRED-WACC-NEXT: blr +; ; BE-PAIRED-LABEL: testUnalignedLdStPair: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r3, r2, g@toc@ha @@ -558,6 +718,16 @@ define dso_local void @testUnalignedLdStPair() { ; BE-PAIRED-NEXT: pstxv vs0, 19(r3), 0 ; BE-PAIRED-NEXT: blr ; +; BE-PAIRED-WACC-LABEL: testUnalignedLdStPair: +; BE-PAIRED-WACC: # %bb.0: # %entry +; BE-PAIRED-WACC-NEXT: addis r3, r2, g@toc@ha +; BE-PAIRED-WACC-NEXT: addi r3, r3, g@toc@l +; BE-PAIRED-WACC-NEXT: plxv vs0, 27(r3), 0 +; BE-PAIRED-WACC-NEXT: plxv vs1, 11(r3), 0 +; BE-PAIRED-WACC-NEXT: pstxv vs0, 35(r3), 0 +; BE-PAIRED-WACC-NEXT: pstxv vs1, 19(r3), 0 +; BE-PAIRED-WACC-NEXT: blr +; ; LE-PWR9-LABEL: testUnalignedLdStPair: ; LE-PWR9: # %bb.0: # %entry ; LE-PWR9-NEXT: addis r3, r2, g@toc@ha diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll index abc65bed5bf6c..9db8ba1c9eb09 100644 --- a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll @@ -13,6 +13,13 @@ ; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-LE-WACC +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC + declare <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>) declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) declare void @foo() @@ -119,6 +126,101 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i ; CHECK-BE-NEXT: ld r0, 16(r1) ; CHECK-BE-NEXT: mtlr r0 ; CHECK-BE-NEXT: blr +; +; CHECK-LE-WACC-LABEL: intrinsics1: +; CHECK-LE-WACC: # %bb.0: +; CHECK-LE-WACC-NEXT: mflr r0 +; CHECK-LE-WACC-NEXT: std r0, 16(r1) +; CHECK-LE-WACC-NEXT: stdu r1, -176(r1) +; CHECK-LE-WACC-NEXT: .cfi_def_cfa_offset 176 +; CHECK-LE-WACC-NEXT: .cfi_offset lr, 16 +; CHECK-LE-WACC-NEXT: .cfi_offset r30, -16 +; CHECK-LE-WACC-NEXT: .cfi_offset v28, -80 +; CHECK-LE-WACC-NEXT: .cfi_offset v29, -64 +; CHECK-LE-WACC-NEXT: .cfi_offset v30, -48 +; CHECK-LE-WACC-NEXT: .cfi_offset v31, -32 +; CHECK-LE-WACC-NEXT: stxv v28, 96(r1) # 16-byte Folded Spill +; CHECK-LE-WACC-NEXT: stxv v29, 112(r1) # 16-byte Folded Spill +; CHECK-LE-WACC-NEXT: stxv v30, 128(r1) # 16-byte Folded Spill +; CHECK-LE-WACC-NEXT: stxv v31, 144(r1) # 16-byte Folded Spill +; CHECK-LE-WACC-NEXT: vmr v31, v5 +; CHECK-LE-WACC-NEXT: vmr v29, v3 +; CHECK-LE-WACC-NEXT: vmr v30, v4 +; CHECK-LE-WACC-NEXT: vmr v28, v2 +; CHECK-LE-WACC-NEXT: std r30, 160(r1) # 8-byte Folded Spill +; CHECK-LE-WACC-NEXT: ld r30, 272(r1) +; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp60, vsp62, 0 +; CHECK-LE-WACC-NEXT: xvf16ger2pp wacc0, v2, v4 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxvp vsp36, 64(r1) +; CHECK-LE-WACC-NEXT: stxvp vsp34, 32(r1) +; CHECK-LE-WACC-NEXT: bl foo@notoc +; CHECK-LE-WACC-NEXT: lxvp vsp34, 64(r1) +; CHECK-LE-WACC-NEXT: lxvp vsp36, 32(r1) +; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-LE-WACC-NEXT: xvf16ger2pp wacc0, v28, v30 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxv v4, 48(r30) +; CHECK-LE-WACC-NEXT: stxv v5, 32(r30) +; CHECK-LE-WACC-NEXT: stxv v2, 16(r30) +; CHECK-LE-WACC-NEXT: stxv v3, 0(r30) +; CHECK-LE-WACC-NEXT: lxv v31, 144(r1) # 16-byte Folded Reload +; CHECK-LE-WACC-NEXT: lxv v30, 128(r1) # 16-byte Folded Reload +; CHECK-LE-WACC-NEXT: lxv v29, 112(r1) # 16-byte Folded Reload +; CHECK-LE-WACC-NEXT: lxv v28, 96(r1) # 16-byte Folded Reload +; CHECK-LE-WACC-NEXT: ld r30, 160(r1) # 8-byte Folded Reload +; CHECK-LE-WACC-NEXT: addi r1, r1, 176 +; CHECK-LE-WACC-NEXT: ld r0, 16(r1) +; CHECK-LE-WACC-NEXT: mtlr r0 +; CHECK-LE-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: intrinsics1: +; CHECK-BE-WACC: # %bb.0: +; CHECK-BE-WACC-NEXT: mflr r0 +; CHECK-BE-WACC-NEXT: std r0, 16(r1) +; CHECK-BE-WACC-NEXT: stdu r1, -256(r1) +; CHECK-BE-WACC-NEXT: .cfi_def_cfa_offset 256 +; CHECK-BE-WACC-NEXT: .cfi_offset lr, 16 +; CHECK-BE-WACC-NEXT: .cfi_offset r30, -16 +; CHECK-BE-WACC-NEXT: .cfi_offset v28, -80 +; CHECK-BE-WACC-NEXT: .cfi_offset v29, -64 +; CHECK-BE-WACC-NEXT: .cfi_offset v30, -48 +; CHECK-BE-WACC-NEXT: .cfi_offset v31, -32 +; CHECK-BE-WACC-NEXT: stxv v28, 176(r1) # 16-byte Folded Spill +; CHECK-BE-WACC-NEXT: stxv v29, 192(r1) # 16-byte Folded Spill +; CHECK-BE-WACC-NEXT: stxv v30, 208(r1) # 16-byte Folded Spill +; CHECK-BE-WACC-NEXT: stxv v31, 224(r1) # 16-byte Folded Spill +; CHECK-BE-WACC-NEXT: vmr v31, v5 +; CHECK-BE-WACC-NEXT: vmr v29, v3 +; CHECK-BE-WACC-NEXT: vmr v30, v4 +; CHECK-BE-WACC-NEXT: vmr v28, v2 +; CHECK-BE-WACC-NEXT: std r30, 240(r1) # 8-byte Folded Spill +; CHECK-BE-WACC-NEXT: ld r30, 368(r1) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp60, vsp62, 0 +; CHECK-BE-WACC-NEXT: xvf16ger2pp wacc0, v2, v4 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxvp vsp36, 112(r1) +; CHECK-BE-WACC-NEXT: stxvp vsp34, 144(r1) +; CHECK-BE-WACC-NEXT: bl foo +; CHECK-BE-WACC-NEXT: nop +; CHECK-BE-WACC-NEXT: lxvp vsp34, 112(r1) +; CHECK-BE-WACC-NEXT: lxvp vsp36, 144(r1) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf16ger2pp wacc0, v28, v30 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r30) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r30) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r30) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r30) +; CHECK-BE-WACC-NEXT: lxv v31, 224(r1) # 16-byte Folded Reload +; CHECK-BE-WACC-NEXT: lxv v30, 208(r1) # 16-byte Folded Reload +; CHECK-BE-WACC-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload +; CHECK-BE-WACC-NEXT: lxv v28, 176(r1) # 16-byte Folded Reload +; CHECK-BE-WACC-NEXT: ld r30, 240(r1) # 8-byte Folded Reload +; CHECK-BE-WACC-NEXT: addi r1, r1, 256 +; CHECK-BE-WACC-NEXT: ld r0, 16(r1) +; CHECK-BE-WACC-NEXT: mtlr r0 +; CHECK-BE-WACC-NEXT: blr %1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4) %2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc3) tail call void @foo() diff --git a/llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll index e932aec2c7134..7b36fa4f64f71 100644 --- a/llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll +++ b/llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-LE-WACC +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC ; Function Attrs: nofree nounwind writeonly define dso_local void @test1(ptr nocapture readnone %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) { @@ -27,6 +33,26 @@ define dso_local void @test1(ptr nocapture readnone %vqp, ptr nocapture readnone ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-LE-WACC-LABEL: test1: +; CHECK-LE-WACC: # %bb.0: # %entry +; CHECK-LE-WACC-NEXT: xvi16ger2 wacc0, v2, v2 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxv v4, 48(r7) +; CHECK-LE-WACC-NEXT: stxv v5, 32(r7) +; CHECK-LE-WACC-NEXT: stxv v2, 16(r7) +; CHECK-LE-WACC-NEXT: stxv v3, 0(r7) +; CHECK-LE-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test1: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: xvi16ger2 wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> %vc, <16 x i8> %vc) store <512 x i1> %0, ptr %resp, align 64 @@ -57,6 +83,26 @@ define dso_local void @test2(ptr nocapture readnone %vqp, ptr nocapture readnone ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-LE-WACC-LABEL: test2: +; CHECK-LE-WACC: # %bb.0: # %entry +; CHECK-LE-WACC-NEXT: pmxvi16ger2 wacc0, v2, v2, 0, 0, 0 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxv v4, 48(r7) +; CHECK-LE-WACC-NEXT: stxv v5, 32(r7) +; CHECK-LE-WACC-NEXT: stxv v2, 16(r7) +; CHECK-LE-WACC-NEXT: stxv v3, 0(r7) +; CHECK-LE-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test2: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: pmxvi16ger2 wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) store <512 x i1> %0, ptr %resp, align 64 @@ -97,6 +143,36 @@ define dso_local void @test3(ptr nocapture readonly %vqp, ptr nocapture readnone ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-LE-WACC-LABEL: test3: +; CHECK-LE-WACC: # %bb.0: # %entry +; CHECK-LE-WACC-NEXT: lxv v5, 0(r3) +; CHECK-LE-WACC-NEXT: lxv v1, 32(r3) +; CHECK-LE-WACC-NEXT: lxv v4, 16(r3) +; CHECK-LE-WACC-NEXT: lxv v0, 48(r3) +; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-LE-WACC-NEXT: xvi8ger4spp wacc0, v2, v2 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxv v4, 48(r7) +; CHECK-LE-WACC-NEXT: stxv v5, 32(r7) +; CHECK-LE-WACC-NEXT: stxv v2, 16(r7) +; CHECK-LE-WACC-NEXT: stxv v3, 0(r7) +; CHECK-LE-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test3: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvi8ger4spp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -138,6 +214,36 @@ define dso_local void @test4(ptr nocapture readonly %vqp, ptr nocapture readnone ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-LE-WACC-LABEL: test4: +; CHECK-LE-WACC: # %bb.0: # %entry +; CHECK-LE-WACC-NEXT: lxv v5, 0(r3) +; CHECK-LE-WACC-NEXT: lxv v1, 32(r3) +; CHECK-LE-WACC-NEXT: lxv v4, 16(r3) +; CHECK-LE-WACC-NEXT: lxv v0, 48(r3) +; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-LE-WACC-NEXT: xvi16ger2pp wacc0, v2, v2 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxv v4, 48(r7) +; CHECK-LE-WACC-NEXT: stxv v5, 32(r7) +; CHECK-LE-WACC-NEXT: stxv v2, 16(r7) +; CHECK-LE-WACC-NEXT: stxv v3, 0(r7) +; CHECK-LE-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test4: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvi16ger2pp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -179,6 +285,36 @@ define dso_local void @test5(ptr nocapture readonly %vqp, ptr nocapture readnone ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-LE-WACC-LABEL: test5: +; CHECK-LE-WACC: # %bb.0: # %entry +; CHECK-LE-WACC-NEXT: lxv v5, 0(r3) +; CHECK-LE-WACC-NEXT: lxv v1, 32(r3) +; CHECK-LE-WACC-NEXT: lxv v4, 16(r3) +; CHECK-LE-WACC-NEXT: lxv v0, 48(r3) +; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-LE-WACC-NEXT: pmxvi8ger4spp wacc0, v2, v2, 0, 0, 0 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxv v4, 48(r7) +; CHECK-LE-WACC-NEXT: stxv v5, 32(r7) +; CHECK-LE-WACC-NEXT: stxv v2, 16(r7) +; CHECK-LE-WACC-NEXT: stxv v3, 0(r7) +; CHECK-LE-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test5: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvi8ger4spp wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) @@ -220,6 +356,36 @@ define dso_local void @test6(ptr nocapture readonly %vqp, ptr nocapture readnone ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-LE-WACC-LABEL: test6: +; CHECK-LE-WACC: # %bb.0: # %entry +; CHECK-LE-WACC-NEXT: lxv v5, 0(r3) +; CHECK-LE-WACC-NEXT: lxv v1, 32(r3) +; CHECK-LE-WACC-NEXT: lxv v4, 16(r3) +; CHECK-LE-WACC-NEXT: lxv v0, 48(r3) +; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-LE-WACC-NEXT: pmxvi16ger2pp wacc0, v2, v2, 0, 0, 0 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxv v4, 48(r7) +; CHECK-LE-WACC-NEXT: stxv v5, 32(r7) +; CHECK-LE-WACC-NEXT: stxv v2, 16(r7) +; CHECK-LE-WACC-NEXT: stxv v3, 0(r7) +; CHECK-LE-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test6: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvi16ger2pp wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll index 8fbc9d785796d..3505cbb197bf9 100644 --- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-WACC +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC ; assemble_acc declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) @@ -32,6 +38,28 @@ define void @ass_acc(ptr %ptr, <16 x i8> %vc) { ; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: ass_acc: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: vmr v3, v2 +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: ass_acc: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: vmr v3, v2 +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc) store <512 x i1> %0, ptr %ptr, align 64 @@ -66,6 +94,28 @@ define void @int_xxmtacc(ptr %ptr, <16 x i8> %vc) { ; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: int_xxmtacc: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: vmr v3, v2 +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: int_xxmtacc: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: vmr v3, v2 +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: blr entry: ; One xxmtacc is generated from the call to assemble.acc then one xxmtacc is ; generated from the call to xxmtacc then one xxmfacc is generated for the store @@ -101,6 +151,28 @@ define void @int_xxmfacc(ptr %ptr, <16 x i8> %vc) { ; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: int_xxmfacc: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: vmr v3, v2 +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: int_xxmfacc: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: vmr v3, v2 +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: blr entry: ; One xxmtacc is generated from the call to assemble.acc then one xxmfacc is ; generated from the call to xxmfacc then one xxmfacc is generated for the store @@ -132,6 +204,26 @@ define void @int_xxsetaccz(ptr %ptr) { ; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: int_xxsetaccz: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: int_xxsetaccz: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() store <512 x i1> %0, ptr %ptr, align 64 @@ -160,6 +252,26 @@ define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) { ; CHECK-BE-NEXT: stxv vs2, 0(r5) ; CHECK-BE-NEXT: stxv vs3, 0(r6) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: disass_acc: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v5, 0(r3) +; CHECK-WACC-NEXT: stxv v4, 0(r4) +; CHECK-WACC-NEXT: stxv v3, 0(r5) +; CHECK-WACC-NEXT: stxv v2, 0(r6) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: disass_acc: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 0(r4) +; CHECK-BE-WACC-NEXT: stxv v4, 0(r5) +; CHECK-BE-WACC-NEXT: stxv v5, 0(r6) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0) @@ -219,6 +331,50 @@ define void @testBranch(ptr %ptr, <16 x i8> %vc, i32 %val) { ; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: testBranch: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: cmplwi r7, 0 +; CHECK-WACC-NEXT: beq cr0, .LBB5_2 +; CHECK-WACC-NEXT: # %bb.1: # %if.then +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: b .LBB5_3 +; CHECK-WACC-NEXT: .LBB5_2: # %if.else +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvi4ger8pp wacc0, v2, v2 +; CHECK-WACC-NEXT: .LBB5_3: # %if.end +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: testBranch: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: cmplwi r7, 0 +; CHECK-BE-WACC-NEXT: beq cr0, .LBB5_2 +; CHECK-BE-WACC-NEXT: # %bb.1: # %if.then +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: b .LBB5_3 +; CHECK-BE-WACC-NEXT: .LBB5_2: # %if.else +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvi4ger8pp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: .LBB5_3: # %if.end +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: blr entry: %tobool = icmp eq i32 %val, 0 br i1 %tobool, label %if.else, label %if.then @@ -273,6 +429,36 @@ define void @testcse(ptr %res, <16 x i8> %vc) { ; CHECK-BE-NEXT: stxv vs3, 112(r3) ; CHECK-BE-NEXT: stxv vs2, 96(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: testcse: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: stxv v4, 112(r3) +; CHECK-WACC-NEXT: stxv v5, 96(r3) +; CHECK-WACC-NEXT: stxv v2, 80(r3) +; CHECK-WACC-NEXT: stxv v3, 64(r3) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: testcse: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: stxv v5, 112(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 96(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 80(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 64(r3) +; CHECK-BE-WACC-NEXT: blr entry: %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() %1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() @@ -320,6 +506,42 @@ define void @testcse2(ptr %res, <16 x i8> %vc) { ; CHECK-BE-NEXT: stxv vs3, 112(r3) ; CHECK-BE-NEXT: stxv vs2, 96(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: testcse2: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: dmxxsetaccz wacc1 +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: xvf32gerpp wacc1, v2, v2 +; CHECK-WACC-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 112(r3) +; CHECK-WACC-NEXT: stxv v5, 96(r3) +; CHECK-WACC-NEXT: stxv v2, 80(r3) +; CHECK-WACC-NEXT: stxv v3, 64(r3) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: testcse2: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc1 +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: xvf32gerpp wacc1, v2, v2 +; CHECK-BE-WACC-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 112(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 96(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 80(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 64(r3) +; CHECK-BE-WACC-NEXT: blr entry: %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() %1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() @@ -367,6 +589,42 @@ define void @testcse3(ptr %res, <16 x i8> %vc) { ; CHECK-BE-NEXT: stxv vs3, 112(r3) ; CHECK-BE-NEXT: stxv vs2, 96(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: testcse3: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: dmxxsetaccz wacc1 +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: xvf32gerpp wacc1, v2, v2 +; CHECK-WACC-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 112(r3) +; CHECK-WACC-NEXT: stxv v5, 96(r3) +; CHECK-WACC-NEXT: stxv v2, 80(r3) +; CHECK-WACC-NEXT: stxv v3, 64(r3) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: testcse3: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc1 +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: xvf32gerpp wacc1, v2, v2 +; CHECK-BE-WACC-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 112(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 96(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 80(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 64(r3) +; CHECK-BE-WACC-NEXT: blr entry: %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() %1 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -475,6 +733,104 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) { ; CHECK-BE-NEXT: bdnz .LBB9_2 ; CHECK-BE-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: testcse4: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: cmpwi r4, 1 +; CHECK-WACC-NEXT: bltlr cr0 +; CHECK-WACC-NEXT: # %bb.1: # %for.body.preheader +; CHECK-WACC-NEXT: clrldi r4, r4, 32 +; CHECK-WACC-NEXT: mtctr r4 +; CHECK-WACC-NEXT: li r4, 0 +; CHECK-WACC-NEXT: li r6, 0 +; CHECK-WACC-NEXT: .p2align 4 +; CHECK-WACC-NEXT: .LBB9_2: # %for.body +; CHECK-WACC-NEXT: # +; CHECK-WACC-NEXT: rldic r7, r6, 4, 28 +; CHECK-WACC-NEXT: add r8, r5, r7 +; CHECK-WACC-NEXT: lxvx vs0, r5, r7 +; CHECK-WACC-NEXT: lxv vs1, 16(r8) +; CHECK-WACC-NEXT: dmxxsetaccz wacc2 +; CHECK-WACC-NEXT: dmxxsetaccz wacc1 +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: xvf32gerpp wacc2, vs0, vs1 +; CHECK-WACC-NEXT: lxv vs0, 32(r8) +; CHECK-WACC-NEXT: lxv vs1, 48(r8) +; CHECK-WACC-NEXT: rldic r7, r4, 6, 26 +; CHECK-WACC-NEXT: addi r4, r4, 3 +; CHECK-WACC-NEXT: addi r6, r6, 6 +; CHECK-WACC-NEXT: xvf32gerpn wacc1, vs0, vs1 +; CHECK-WACC-NEXT: lxv vs0, 64(r8) +; CHECK-WACC-NEXT: lxv vs1, 80(r8) +; CHECK-WACC-NEXT: add r8, r3, r7 +; CHECK-WACC-NEXT: xvf32gernp wacc0, vs0, vs1 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc2, 0 +; CHECK-WACC-NEXT: stxvx v3, r3, r7 +; CHECK-WACC-NEXT: stxv v4, 48(r8) +; CHECK-WACC-NEXT: stxv v5, 32(r8) +; CHECK-WACC-NEXT: stxv v2, 16(r8) +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-WACC-NEXT: stxv v4, 112(r8) +; CHECK-WACC-NEXT: stxv v5, 96(r8) +; CHECK-WACC-NEXT: stxv v2, 80(r8) +; CHECK-WACC-NEXT: stxv v3, 64(r8) +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 176(r8) +; CHECK-WACC-NEXT: stxv v5, 160(r8) +; CHECK-WACC-NEXT: stxv v2, 144(r8) +; CHECK-WACC-NEXT: stxv v3, 128(r8) +; CHECK-WACC-NEXT: bdnz .LBB9_2 +; CHECK-WACC-NEXT: # %bb.3: # %for.cond.cleanup +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: testcse4: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: cmpwi r4, 1 +; CHECK-BE-WACC-NEXT: bltlr cr0 +; CHECK-BE-WACC-NEXT: # %bb.1: # %for.body.preheader +; CHECK-BE-WACC-NEXT: clrldi r4, r4, 32 +; CHECK-BE-WACC-NEXT: mtctr r4 +; CHECK-BE-WACC-NEXT: li r4, 0 +; CHECK-BE-WACC-NEXT: li r6, 0 +; CHECK-BE-WACC-NEXT: .p2align 4 +; CHECK-BE-WACC-NEXT: .LBB9_2: # %for.body +; CHECK-BE-WACC-NEXT: # +; CHECK-BE-WACC-NEXT: rldic r7, r6, 4, 28 +; CHECK-BE-WACC-NEXT: add r8, r5, r7 +; CHECK-BE-WACC-NEXT: lxvx vs0, r5, r7 +; CHECK-BE-WACC-NEXT: lxv vs1, 16(r8) +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc2 +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc1 +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: xvf32gerpp wacc2, vs0, vs1 +; CHECK-BE-WACC-NEXT: lxv vs0, 32(r8) +; CHECK-BE-WACC-NEXT: lxv vs1, 48(r8) +; CHECK-BE-WACC-NEXT: rldic r7, r4, 6, 26 +; CHECK-BE-WACC-NEXT: addi r4, r4, 3 +; CHECK-BE-WACC-NEXT: addi r6, r6, 6 +; CHECK-BE-WACC-NEXT: xvf32gerpn wacc1, vs0, vs1 +; CHECK-BE-WACC-NEXT: lxv vs0, 64(r8) +; CHECK-BE-WACC-NEXT: lxv vs1, 80(r8) +; CHECK-BE-WACC-NEXT: add r8, r3, r7 +; CHECK-BE-WACC-NEXT: xvf32gernp wacc0, vs0, vs1 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc2, 0 +; CHECK-BE-WACC-NEXT: stxvx v2, r3, r7 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r8) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r8) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r8) +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 112(r8) +; CHECK-BE-WACC-NEXT: stxv v4, 96(r8) +; CHECK-BE-WACC-NEXT: stxv v3, 80(r8) +; CHECK-BE-WACC-NEXT: stxv v2, 64(r8) +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 176(r8) +; CHECK-BE-WACC-NEXT: stxv v4, 160(r8) +; CHECK-BE-WACC-NEXT: stxv v3, 144(r8) +; CHECK-BE-WACC-NEXT: stxv v2, 128(r8) +; CHECK-BE-WACC-NEXT: bdnz .LBB9_2 +; CHECK-BE-WACC-NEXT: # %bb.3: # %for.cond.cleanup +; CHECK-BE-WACC-NEXT: blr entry: %cmp55 = icmp sgt i32 %lim, 0 br i1 %cmp55, label %for.body.preheader, label %for.cond.cleanup @@ -600,6 +956,71 @@ define void @testRedundantPrimeUnprime(ptr %dst, <16 x i8> %vc) nounwind { ; CHECK-BE-NEXT: ld r0, 16(r1) ; CHECK-BE-NEXT: mtlr r0 ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: testRedundantPrimeUnprime: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: mflr r0 +; CHECK-WACC-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-WACC-NEXT: std r0, 16(r1) +; CHECK-WACC-NEXT: stdu r1, -112(r1) +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0 +; CHECK-WACC-NEXT: stxv v0, 48(r3) +; CHECK-WACC-NEXT: stxv v1, 32(r3) +; CHECK-WACC-NEXT: stxv v4, 16(r3) +; CHECK-WACC-NEXT: stxv v5, 0(r3) +; CHECK-WACC-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; CHECK-WACC-NEXT: mr r30, r3 +; CHECK-WACC-NEXT: stxvp vsp36, 64(r1) +; CHECK-WACC-NEXT: stxvp vsp34, 32(r1) +; CHECK-WACC-NEXT: bl testRedundantPrimeUnprimeF@notoc +; CHECK-WACC-NEXT: lxvp vsp34, 64(r1) +; CHECK-WACC-NEXT: lxvp vsp36, 32(r1) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 112(r30) +; CHECK-WACC-NEXT: stxv v5, 96(r30) +; CHECK-WACC-NEXT: stxv v2, 80(r30) +; CHECK-WACC-NEXT: stxv v3, 64(r30) +; CHECK-WACC-NEXT: addi r1, r1, 112 +; CHECK-WACC-NEXT: ld r0, 16(r1) +; CHECK-WACC-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-WACC-NEXT: mtlr r0 +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: testRedundantPrimeUnprime: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: mflr r0 +; CHECK-BE-WACC-NEXT: std r0, 16(r1) +; CHECK-BE-WACC-NEXT: stdu r1, -192(r1) +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: std r30, 176(r1) # 8-byte Folded Spill +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v1, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v0, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v5, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 0(r3) +; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; CHECK-BE-WACC-NEXT: mr r30, r3 +; CHECK-BE-WACC-NEXT: stxvp vsp36, 112(r1) +; CHECK-BE-WACC-NEXT: stxvp vsp34, 144(r1) +; CHECK-BE-WACC-NEXT: bl testRedundantPrimeUnprimeF +; CHECK-BE-WACC-NEXT: nop +; CHECK-BE-WACC-NEXT: lxvp vsp34, 112(r1) +; CHECK-BE-WACC-NEXT: lxvp vsp36, 144(r1) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 112(r30) +; CHECK-BE-WACC-NEXT: stxv v4, 96(r30) +; CHECK-BE-WACC-NEXT: stxv v3, 80(r30) +; CHECK-BE-WACC-NEXT: stxv v2, 64(r30) +; CHECK-BE-WACC-NEXT: ld r30, 176(r1) # 8-byte Folded Reload +; CHECK-BE-WACC-NEXT: addi r1, r1, 192 +; CHECK-BE-WACC-NEXT: ld r0, 16(r1) +; CHECK-BE-WACC-NEXT: mtlr r0 +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() store <512 x i1> %0, ptr %dst, align 64 @@ -646,6 +1067,38 @@ define void @test_ldst_1(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, p ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test_ldst_1: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: plxvp vsp36, 8(r4), 0 +; CHECK-WACC-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test_ldst_1: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: plxvp vsp36, 8(r4), 0 +; CHECK-BE-WACC-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = getelementptr i8, ptr %vpp, i64 8 @@ -688,6 +1141,38 @@ define void @test_ldst_2(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, p ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test_ldst_2: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxvp vsp36, 0(r4) +; CHECK-WACC-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test_ldst_2: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxvp vsp36, 0(r4) +; CHECK-BE-WACC-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %vpp) @@ -729,6 +1214,38 @@ define void @test_ldst_3(ptr nocapture readonly %vqp, i64 %offs, ptr %vpp, <16 x ; CHECK-BE-NEXT: stxv vs3, 48(r9) ; CHECK-BE-NEXT: stxv vs2, 32(r9) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test_ldst_3: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxvp vsp36, 0(r5) +; CHECK-WACC-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r9) +; CHECK-WACC-NEXT: stxv v5, 32(r9) +; CHECK-WACC-NEXT: stxv v2, 16(r9) +; CHECK-WACC-NEXT: stxv v3, 0(r9) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test_ldst_3: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxvp vsp36, 0(r5) +; CHECK-BE-WACC-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r9) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r9) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r9) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r9) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %vpp) diff --git a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll index ac6ad41633492..ff860b8d6ff22 100644 --- a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll +++ b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -enable-subreg-liveness -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -enable-subreg-liveness -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-WACC +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -enable-subreg-liveness -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>) @@ -56,6 +62,46 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i ; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: intrinsics1: +; CHECK-WACC: # %bb.0: +; CHECK-WACC-NEXT: vmr v1, v4 +; CHECK-WACC-NEXT: vmr v4, v3 +; CHECK-WACC-NEXT: vmr v0, v2 +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvi4ger8pp wacc0, v2, v4 +; CHECK-WACC-NEXT: ld r3, 96(r1) +; CHECK-WACC-NEXT: xvf16ger2pp wacc0, v0, v1 +; CHECK-WACC-NEXT: vmr v3, v2 +; CHECK-WACC-NEXT: vmr v2, v5 +; CHECK-WACC-NEXT: pmxvf32gerpn wacc0, v4, v5, 0, 0 +; CHECK-WACC-NEXT: pmxvf64gernp wacc0, vsp34, v0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: intrinsics1: +; CHECK-BE-WACC: # %bb.0: +; CHECK-BE-WACC-NEXT: vmr v1, v4 +; CHECK-BE-WACC-NEXT: vmr v4, v3 +; CHECK-BE-WACC-NEXT: vmr v0, v2 +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvi4ger8pp wacc0, v2, v4 +; CHECK-BE-WACC-NEXT: ld r3, 112(r1) +; CHECK-BE-WACC-NEXT: xvf16ger2pp wacc0, v0, v1 +; CHECK-BE-WACC-NEXT: vmr v3, v2 +; CHECK-BE-WACC-NEXT: vmr v2, v5 +; CHECK-BE-WACC-NEXT: pmxvf32gerpn wacc0, v4, v5, 0, 0 +; CHECK-BE-WACC-NEXT: pmxvf64gernp wacc0, vsp34, v0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: blr %1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc3, <16 x i8> %vc2, <16 x i8> %vc4) %2 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc2) %3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3) @@ -115,6 +161,46 @@ define void @intrinsics2(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4, ptr %ptr) { ; CHECK-BE-NEXT: stxv vs2, 0(r5) ; CHECK-BE-NEXT: stxv vs3, 0(r6) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: intrinsics2: +; CHECK-WACC: # %bb.0: +; CHECK-WACC-NEXT: lxv v2, 0(r3) +; CHECK-WACC-NEXT: lxv v4, 0(r5) +; CHECK-WACC-NEXT: lxv v3, 0(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r6) +; CHECK-WACC-NEXT: vmr v1, v2 +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-WACC-NEXT: xvi8ger4pp wacc0, v2, v3 +; CHECK-WACC-NEXT: xvf16ger2pn wacc0, v2, v4 +; CHECK-WACC-NEXT: vmr v0, v5 +; CHECK-WACC-NEXT: pmxvf32gernn wacc0, v3, v5, 0, 0 +; CHECK-WACC-NEXT: pmxvf64gernn wacc0, vsp32, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v5, 0(r3) +; CHECK-WACC-NEXT: stxv v4, 0(r4) +; CHECK-WACC-NEXT: stxv v3, 0(r5) +; CHECK-WACC-NEXT: stxv v2, 0(r6) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: intrinsics2: +; CHECK-BE-WACC: # %bb.0: +; CHECK-BE-WACC-NEXT: lxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 0(r5) +; CHECK-BE-WACC-NEXT: lxv v3, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 0(r6) +; CHECK-BE-WACC-NEXT: vmr v1, v2 +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvi8ger4pp wacc0, v2, v3 +; CHECK-BE-WACC-NEXT: xvf16ger2pn wacc0, v2, v4 +; CHECK-BE-WACC-NEXT: vmr v0, v5 +; CHECK-BE-WACC-NEXT: pmxvf32gernn wacc0, v3, v5, 0, 0 +; CHECK-BE-WACC-NEXT: pmxvf64gernn wacc0, vsp32, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 0(r4) +; CHECK-BE-WACC-NEXT: stxv v4, 0(r5) +; CHECK-BE-WACC-NEXT: stxv v5, 0(r6) +; CHECK-BE-WACC-NEXT: blr %vc1 = load <16 x i8>, ptr %ptr1, align 16 %vc2 = load <16 x i8>, ptr %ptr2, align 16 %vc3 = load <16 x i8>, ptr %ptr3, align 16 @@ -157,6 +243,26 @@ define void @test1(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test1: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: xvi4ger8 wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test1: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: xvi4ger8 wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> %vc, <16 x i8> %vc) store <512 x i1> %0, ptr %resp, align 64 @@ -196,6 +302,36 @@ define void @test2(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test2: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvi4ger8pp wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test2: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvi4ger8pp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -226,6 +362,26 @@ define void @test3(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test3: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: pmxvi4ger8 wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test3: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: pmxvi4ger8 wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) store <512 x i1> %0, ptr %resp, align 64 @@ -265,6 +421,36 @@ define void @test4(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test4: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvi4ger8pp wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test4: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvi4ger8pp wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) @@ -295,6 +481,26 @@ define void @test5(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test5: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: xvi8ger4 wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test5: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: xvi8ger4 wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> %vc, <16 x i8> %vc) store <512 x i1> %0, ptr %resp, align 64 @@ -334,6 +540,36 @@ define void @test6(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test6: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvi8ger4pp wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test6: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvi8ger4pp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -364,6 +600,26 @@ define void @test7(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test7: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: pmxvi8ger4 wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test7: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: pmxvi8ger4 wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) store <512 x i1> %0, ptr %resp, align 64 @@ -403,6 +659,36 @@ define void @test8(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test8: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvi8ger4pp wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test8: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvi8ger4pp wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) @@ -433,6 +719,26 @@ define void @test9(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test9: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: xvi16ger2s wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test9: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: xvi16ger2s wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> %vc, <16 x i8> %vc) store <512 x i1> %0, ptr %resp, align 64 @@ -472,6 +778,36 @@ define void @test10(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test10: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvi16ger2spp wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test10: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvi16ger2spp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -502,6 +838,26 @@ define void @test11(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test11: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: pmxvi16ger2s wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test11: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: pmxvi16ger2s wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) store <512 x i1> %0, ptr %resp, align 64 @@ -541,6 +897,36 @@ define void @test12(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test12: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvi16ger2spp wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test12: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvi16ger2spp wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) @@ -571,6 +957,26 @@ define void @test13(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test13: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: xvf16ger2 wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test13: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: xvf16ger2 wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> %vc, <16 x i8> %vc) store <512 x i1> %0, ptr %resp, align 64 @@ -610,6 +1016,36 @@ define void @test14(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test14: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvf16ger2pp wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test14: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf16ger2pp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -650,6 +1086,36 @@ define void @test15(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test15: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvf16ger2pn wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test15: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf16ger2pn wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -690,6 +1156,36 @@ define void @test16(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test16: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvf16ger2np wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test16: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf16ger2np wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -730,6 +1226,36 @@ define void @test17(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test17: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvf16ger2nn wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test17: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf16ger2nn wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -760,6 +1286,26 @@ define void @test18(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test18: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: pmxvf16ger2 wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test18: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: pmxvf16ger2 wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) store <512 x i1> %0, ptr %resp, align 64 @@ -799,6 +1345,36 @@ define void @test19(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test19: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvf16ger2pp wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test19: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvf16ger2pp wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) @@ -839,6 +1415,36 @@ define void @test20(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test20: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvf16ger2pn wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test20: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvf16ger2pn wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) @@ -879,6 +1485,36 @@ define void @test21(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test21: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvf16ger2np wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test21: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvf16ger2np wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) @@ -919,6 +1555,36 @@ define void @test22(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test22: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvf16ger2nn wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test22: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvf16ger2nn wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) @@ -949,6 +1615,26 @@ define void @test23(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test23: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: xvf32ger wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test23: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: xvf32ger wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> %vc, <16 x i8> %vc) store <512 x i1> %0, ptr %resp, align 64 @@ -988,6 +1674,36 @@ define void @test24(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test24: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test24: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -1028,6 +1744,36 @@ define void @test25(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test25: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test25: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -1068,6 +1814,36 @@ define void @test26(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test26: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvf32gernp wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test26: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf32gernp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -1108,6 +1884,36 @@ define void @test27(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test27: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvf32gernn wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test27: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf32gernn wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -1138,6 +1944,26 @@ define void @test28(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test28: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: pmxvf32ger wacc0, v2, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test28: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: pmxvf32ger wacc0, v2, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) store <512 x i1> %0, ptr %resp, align 64 @@ -1177,6 +2003,36 @@ define void @test29(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test29: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvf32gerpp wacc0, v2, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test29: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvf32gerpp wacc0, v2, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) @@ -1217,6 +2073,36 @@ define void @test30(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test30: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvf32gerpn wacc0, v2, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test30: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvf32gerpn wacc0, v2, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) @@ -1257,6 +2143,36 @@ define void @test31(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test31: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvf32gernp wacc0, v2, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test31: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvf32gernp wacc0, v2, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) @@ -1297,6 +2213,36 @@ define void @test32(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test32: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvf32gernn wacc0, v2, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test32: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvf32gernn wacc0, v2, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) @@ -1331,6 +2277,30 @@ define void @test33(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test33: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: xvf64ger wacc0, vsp36, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test33: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: xvf64ger wacc0, vsp36, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <256 x i1>, ptr %vpp, align 32 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %0, <16 x i8> %vc) @@ -1375,6 +2345,40 @@ define void @test34(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test34: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: xvf64gerpp wacc0, vsp36, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test34: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: xvf64gerpp wacc0, vsp36, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = load <256 x i1>, ptr %vpp, align 32 @@ -1420,6 +2424,40 @@ define void @test35(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test35: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: xvf64gerpn wacc0, vsp36, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test35: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: xvf64gerpn wacc0, vsp36, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = load <256 x i1>, ptr %vpp, align 32 @@ -1465,6 +2503,40 @@ define void @test36(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test36: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test36: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = load <256 x i1>, ptr %vpp, align 32 @@ -1510,6 +2582,40 @@ define void @test37(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test37: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: xvf64gernn wacc0, vsp36, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test37: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: xvf64gernn wacc0, vsp36, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = load <256 x i1>, ptr %vpp, align 32 @@ -1545,6 +2651,30 @@ define void @test38(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test38: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: pmxvf64ger wacc0, vsp36, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test38: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: pmxvf64ger wacc0, vsp36, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <256 x i1>, ptr %vpp, align 32 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %0, <16 x i8> %vc, i32 0, i32 0) @@ -1589,6 +2719,40 @@ define void @test39(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test39: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: pmxvf64gerpp wacc0, vsp36, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test39: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: pmxvf64gerpp wacc0, vsp36, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = load <256 x i1>, ptr %vpp, align 32 @@ -1634,6 +2798,40 @@ define void @test40(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test40: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: pmxvf64gerpn wacc0, vsp36, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test40: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: pmxvf64gerpn wacc0, vsp36, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = load <256 x i1>, ptr %vpp, align 32 @@ -1679,6 +2877,40 @@ define void @test41(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test41: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: pmxvf64gernp wacc0, vsp36, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test41: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: pmxvf64gernp wacc0, vsp36, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = load <256 x i1>, ptr %vpp, align 32 @@ -1724,6 +2956,40 @@ define void @test42(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: test42: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test42: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = load <256 x i1>, ptr %vpp, align 32 diff --git a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll index 89e5147aecc5f..37d0e69b3beaa 100644 --- a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll +++ b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll @@ -5,6 +5,12 @@ ; RUN: llc -O3 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -O3 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-WACC +; RUN: llc -O3 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>) declare <512 x i1> @llvm.ppc.mma.xxsetaccz() @@ -64,6 +70,60 @@ define void @testPHI1(ptr %Dst, ptr %Src, i32 signext %Len) { ; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: testPHI1: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: cmpwi r5, 3 +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: blt cr0, .LBB0_3 +; CHECK-WACC-NEXT: # %bb.1: # %for.body.preheader +; CHECK-WACC-NEXT: clrldi r5, r5, 32 +; CHECK-WACC-NEXT: addi r5, r5, -2 +; CHECK-WACC-NEXT: lxv v2, 0(r4) +; CHECK-WACC-NEXT: lxv v3, 16(r4) +; CHECK-WACC-NEXT: mtctr r5 +; CHECK-WACC-NEXT: addi r4, r4, 32 +; CHECK-WACC-NEXT: .p2align 4 +; CHECK-WACC-NEXT: .LBB0_2: # %for.body +; CHECK-WACC-NEXT: # +; CHECK-WACC-NEXT: lxv vs0, 0(r4) +; CHECK-WACC-NEXT: addi r4, r4, 16 +; CHECK-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0 +; CHECK-WACC-NEXT: bdnz .LBB0_2 +; CHECK-WACC-NEXT: .LBB0_3: # %for.cond.cleanup +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v5, 0(r3) +; CHECK-WACC-NEXT: stxv v4, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 48(r3) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: testPHI1: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: cmpwi r5, 3 +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: blt cr0, .LBB0_3 +; CHECK-BE-WACC-NEXT: # %bb.1: # %for.body.preheader +; CHECK-BE-WACC-NEXT: clrldi r5, r5, 32 +; CHECK-BE-WACC-NEXT: addi r5, r5, -2 +; CHECK-BE-WACC-NEXT: lxv v2, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v3, 16(r4) +; CHECK-BE-WACC-NEXT: mtctr r5 +; CHECK-BE-WACC-NEXT: addi r4, r4, 32 +; CHECK-BE-WACC-NEXT: .p2align 4 +; CHECK-BE-WACC-NEXT: .LBB0_2: # %for.body +; CHECK-BE-WACC-NEXT: # +; CHECK-BE-WACC-NEXT: lxv vs0, 0(r4) +; CHECK-BE-WACC-NEXT: addi r4, r4, 16 +; CHECK-BE-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0 +; CHECK-BE-WACC-NEXT: bdnz .LBB0_2 +; CHECK-BE-WACC-NEXT: .LBB0_3: # %for.cond.cleanup +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <16 x i8>, ptr %Src, align 16 %arrayidx1 = getelementptr inbounds <16 x i8>, ptr %Src, i64 1 @@ -161,6 +221,62 @@ define dso_local void @testPHI2(ptr %Dst, ptr %Src, i32 signext %Len) { ; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: testPHI2: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v2, 0(r4) +; CHECK-WACC-NEXT: lxv v3, 16(r4) +; CHECK-WACC-NEXT: lxv vs0, 32(r4) +; CHECK-WACC-NEXT: cmpwi r5, 4 +; CHECK-WACC-NEXT: xvf64ger wacc0, vsp34, vs0 +; CHECK-WACC-NEXT: blt cr0, .LBB1_3 +; CHECK-WACC-NEXT: # %bb.1: # %for.body.preheader +; CHECK-WACC-NEXT: clrldi r5, r5, 32 +; CHECK-WACC-NEXT: addi r5, r5, -3 +; CHECK-WACC-NEXT: mtctr r5 +; CHECK-WACC-NEXT: addi r4, r4, 48 +; CHECK-WACC-NEXT: .p2align 4 +; CHECK-WACC-NEXT: .LBB1_2: # %for.body +; CHECK-WACC-NEXT: # +; CHECK-WACC-NEXT: lxv vs0, 0(r4) +; CHECK-WACC-NEXT: addi r4, r4, 16 +; CHECK-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0 +; CHECK-WACC-NEXT: bdnz .LBB1_2 +; CHECK-WACC-NEXT: .LBB1_3: # %for.cond.cleanup +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v5, 0(r3) +; CHECK-WACC-NEXT: stxv v4, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 48(r3) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: testPHI2: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v2, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v3, 16(r4) +; CHECK-BE-WACC-NEXT: lxv vs0, 32(r4) +; CHECK-BE-WACC-NEXT: cmpwi r5, 4 +; CHECK-BE-WACC-NEXT: xvf64ger wacc0, vsp34, vs0 +; CHECK-BE-WACC-NEXT: blt cr0, .LBB1_3 +; CHECK-BE-WACC-NEXT: # %bb.1: # %for.body.preheader +; CHECK-BE-WACC-NEXT: clrldi r5, r5, 32 +; CHECK-BE-WACC-NEXT: addi r5, r5, -3 +; CHECK-BE-WACC-NEXT: mtctr r5 +; CHECK-BE-WACC-NEXT: addi r4, r4, 48 +; CHECK-BE-WACC-NEXT: .p2align 4 +; CHECK-BE-WACC-NEXT: .LBB1_2: # %for.body +; CHECK-BE-WACC-NEXT: # +; CHECK-BE-WACC-NEXT: lxv vs0, 0(r4) +; CHECK-BE-WACC-NEXT: addi r4, r4, 16 +; CHECK-BE-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0 +; CHECK-BE-WACC-NEXT: bdnz .LBB1_2 +; CHECK-BE-WACC-NEXT: .LBB1_3: # %for.cond.cleanup +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <16 x i8>, ptr %Src, align 16 %arrayidx1 = getelementptr inbounds <16 x i8>, ptr %Src, i64 1 @@ -229,6 +345,28 @@ define void @testImplicitDef(ptr %ptr) { ; CHECK-BE-NEXT: xxmfacc acc0 ; CHECK-BE-NEXT: stxv vs3, 0(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: testImplicitDef: +; CHECK-WACC: # %bb.0: # %label1 +; CHECK-WACC-NEXT: # implicit-def: $wacc0 +; CHECK-WACC-NEXT: bc 12, 4*cr5+lt, .LBB2_2 +; CHECK-WACC-NEXT: # %bb.1: # %label2 +; CHECK-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0 +; CHECK-WACC-NEXT: .LBB2_2: # %label3 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v2, 0(r3) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: testImplicitDef: +; CHECK-BE-WACC: # %bb.0: # %label1 +; CHECK-BE-WACC-NEXT: # implicit-def: $wacc0 +; CHECK-BE-WACC-NEXT: bc 12, 4*cr5+lt, .LBB2_2 +; CHECK-BE-WACC-NEXT: # %bb.1: # %label2 +; CHECK-BE-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0 +; CHECK-BE-WACC-NEXT: .LBB2_2: # %label3 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 0(r3) +; CHECK-BE-WACC-NEXT: blr label1: br i1 undef, label %label3, label %label2 @@ -312,6 +450,70 @@ define dso_local signext i32 @testNestedPHI(i32 signext %cond, i32 signext %coun ; CHECK-BE-NEXT: stxv vs3, 48(r5) ; CHECK-BE-NEXT: stxv vs2, 32(r5) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: testNestedPHI: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: cmplwi r3, 0 +; CHECK-WACC-NEXT: beq cr0, .LBB3_2 +; CHECK-WACC-NEXT: # %bb.1: # %if.then +; CHECK-WACC-NEXT: xvf32gernp wacc0, v2, v2 +; CHECK-WACC-NEXT: cmpwi r4, 1 +; CHECK-WACC-NEXT: bge cr0, .LBB3_3 +; CHECK-WACC-NEXT: b .LBB3_5 +; CHECK-WACC-NEXT: .LBB3_2: +; CHECK-WACC-NEXT: # implicit-def: $wacc0 +; CHECK-WACC-NEXT: cmpwi r4, 1 +; CHECK-WACC-NEXT: blt cr0, .LBB3_5 +; CHECK-WACC-NEXT: .LBB3_3: # %for.body.preheader +; CHECK-WACC-NEXT: addi r3, r4, -1 +; CHECK-WACC-NEXT: clrldi r3, r3, 32 +; CHECK-WACC-NEXT: addi r3, r3, 1 +; CHECK-WACC-NEXT: mtctr r3 +; CHECK-WACC-NEXT: .p2align 4 +; CHECK-WACC-NEXT: .LBB3_4: # %for.body +; CHECK-WACC-NEXT: # +; CHECK-WACC-NEXT: xvf32gernp wacc0, v2, v2 +; CHECK-WACC-NEXT: bdnz .LBB3_4 +; CHECK-WACC-NEXT: .LBB3_5: # %for.cond.cleanup +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: li r3, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r5) +; CHECK-WACC-NEXT: stxv v5, 32(r5) +; CHECK-WACC-NEXT: stxv v2, 16(r5) +; CHECK-WACC-NEXT: stxv v3, 0(r5) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: testNestedPHI: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: cmplwi r3, 0 +; CHECK-BE-WACC-NEXT: beq cr0, .LBB3_2 +; CHECK-BE-WACC-NEXT: # %bb.1: # %if.then +; CHECK-BE-WACC-NEXT: xvf32gernp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: cmpwi r4, 1 +; CHECK-BE-WACC-NEXT: bge cr0, .LBB3_3 +; CHECK-BE-WACC-NEXT: b .LBB3_5 +; CHECK-BE-WACC-NEXT: .LBB3_2: +; CHECK-BE-WACC-NEXT: # implicit-def: $wacc0 +; CHECK-BE-WACC-NEXT: cmpwi r4, 1 +; CHECK-BE-WACC-NEXT: blt cr0, .LBB3_5 +; CHECK-BE-WACC-NEXT: .LBB3_3: # %for.body.preheader +; CHECK-BE-WACC-NEXT: addi r3, r4, -1 +; CHECK-BE-WACC-NEXT: clrldi r3, r3, 32 +; CHECK-BE-WACC-NEXT: addi r3, r3, 1 +; CHECK-BE-WACC-NEXT: mtctr r3 +; CHECK-BE-WACC-NEXT: .p2align 4 +; CHECK-BE-WACC-NEXT: .LBB3_4: # %for.body +; CHECK-BE-WACC-NEXT: # +; CHECK-BE-WACC-NEXT: xvf32gernp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: bdnz .LBB3_4 +; CHECK-BE-WACC-NEXT: .LBB3_5: # %for.cond.cleanup +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: li r3, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r5) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r5) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r5) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r5) +; CHECK-BE-WACC-NEXT: blr entry: %tobool.not = icmp eq i32 %cond, 0 br i1 %tobool.not, label %if.end, label %if.then diff --git a/llvm/test/CodeGen/PowerPC/peephole-mma-phi-liveness.ll b/llvm/test/CodeGen/PowerPC/peephole-mma-phi-liveness.ll index 291cf97fd009e..929bf5f61dd90 100644 --- a/llvm/test/CodeGen/PowerPC/peephole-mma-phi-liveness.ll +++ b/llvm/test/CodeGen/PowerPC/peephole-mma-phi-liveness.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc64-ibm-aix < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=future \ +; RUN: -mtriple=powerpc64-ibm-aix < %s | FileCheck %s --check-prefix=CHECK-WACC target datalayout = "E-m:a-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512" @@ -38,6 +40,43 @@ define void @baz(i64 %arg) local_unnamed_addr #0 { ; CHECK-NEXT: xxswapd 0, 0 ; CHECK-NEXT: stxv 0, 0(3) ; CHECK-NEXT: blr +; +; CHECK-WACC-LABEL: baz: +; CHECK-WACC: # %bb.0: # %bb +; CHECK-WACC-NEXT: dmxxextfdmr512 34, 36, 0, 0 +; CHECK-WACC-NEXT: xxmrgld 1, 34, 36 +; CHECK-WACC-NEXT: xxswapd 2, 1 +; CHECK-WACC-NEXT: xxlxor 0, 0, 0 +; CHECK-WACC-NEXT: xvnegdp 1, 1 +; CHECK-WACC-NEXT: xvnegdp 2, 2 +; CHECK-WACC-NEXT: xvsubdp 1, 1, 0 +; CHECK-WACC-NEXT: xvsubdp 2, 2, 37 +; CHECK-WACC-NEXT: xvmuldp 1, 1, 0 +; CHECK-WACC-NEXT: xvmuldp 2, 2, 0 +; CHECK-WACC-NEXT: xvmaddadp 1, 0, 0 +; CHECK-WACC-NEXT: xvmaddadp 2, 0, 0 +; CHECK-WACC-NEXT: stxv 1, 0(3) +; CHECK-WACC-NEXT: stxv 2, 0(3) +; CHECK-WACC-NEXT: # implicit-def: $wacc0 +; CHECK-WACC-NEXT: bc 12, 20, L..BB0_2 +; CHECK-WACC-NEXT: # %bb.1: # %bb10 +; CHECK-WACC-NEXT: xvf64gerpp 0, 34, 0 +; CHECK-WACC-NEXT: L..BB0_2: # %bb12 +; CHECK-WACC-NEXT: cmpdi 3, 0 +; CHECK-WACC-NEXT: .align 4 +; CHECK-WACC-NEXT: L..BB0_3: # %bb13 +; CHECK-WACC-NEXT: # +; CHECK-WACC-NEXT: bc 4, 2, L..BB0_3 +; CHECK-WACC-NEXT: # %bb.4: # %bb14 +; CHECK-WACC-NEXT: dmxxextfdmr512 34, 36, 0, 0 +; CHECK-WACC-NEXT: xxlxor 0, 0, 0 +; CHECK-WACC-NEXT: xvsubdp 1, 0, 35 +; CHECK-WACC-NEXT: xxlxor 2, 2, 2 +; CHECK-WACC-NEXT: xvmaddadp 2, 1, 2 +; CHECK-WACC-NEXT: xvadddp 0, 2, 0 +; CHECK-WACC-NEXT: xxswapd 0, 0 +; CHECK-WACC-NEXT: stxv 0, 0(3) +; CHECK-WACC-NEXT: blr bb: %call = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> poison) %extractvalue = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %call, 0