From 8f00dbd69e3dd012e8a21fc35600f77431610319 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Thu, 30 Oct 2025 14:01:40 -0500 Subject: [PATCH 1/5] update test to add checks for cpu=future --- .../CodeGen/PowerPC/mma-acc-copy-hints.ll | 91 ++++++ llvm/test/CodeGen/PowerPC/mma-acc-memops.ll | 290 ++++++++++++++---- llvm/test/CodeGen/PowerPC/mma-acc-spill.ll | 102 ++++++ llvm/test/CodeGen/PowerPC/mma-phi-accs.ll | 202 ++++++++++++ 4 files changed, 625 insertions(+), 60 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll b/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll index 7e2f744ac1d71..c383485cbfba2 100644 --- a/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-LE-FUTURE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-FUTURE define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, ptr nocapture noundef writeonly %c) local_unnamed_addr #0 { ; CHECK-LABEL: testMultiply: @@ -91,6 +97,91 @@ define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture nound ; CHECK-BE-NEXT: ld r30, -16(r1) ; CHECK-BE-NEXT: mtlr r0 ; CHECK-BE-NEXT: blr +; +; CHECK-LE-FUTURE-LABEL: testMultiply: +; CHECK-LE-FUTURE: # %bb.0: # %entry +; CHECK-LE-FUTURE-NEXT: mflr r0 +; CHECK-LE-FUTURE-NEXT: std r30, -16(r1) +; CHECK-LE-FUTURE-NEXT: std r0, 16(r1) +; CHECK-LE-FUTURE-NEXT: clrldi r0, r1, 59 +; CHECK-LE-FUTURE-NEXT: subfic r0, r0, -128 +; CHECK-LE-FUTURE-NEXT: mr r30, r1 +; CHECK-LE-FUTURE-NEXT: stdux r1, r1, r0 +; CHECK-LE-FUTURE-NEXT: stxv v30, -64(r30) # 16-byte Folded Spill +; CHECK-LE-FUTURE-NEXT: stxv v31, -48(r30) # 16-byte Folded Spill +; CHECK-LE-FUTURE-NEXT: lxv v31, 0(r3) +; CHECK-LE-FUTURE-NEXT: lxv v30, 0(r4) +; CHECK-LE-FUTURE-NEXT: addi r3, r1, 32 +; CHECK-LE-FUTURE-NEXT: std r29, -24(r30) # 8-byte Folded Spill +; CHECK-LE-FUTURE-NEXT: vmr v2, v31 +; CHECK-LE-FUTURE-NEXT: vmr v3, v30 +; CHECK-LE-FUTURE-NEXT: mr r29, r5 +; CHECK-LE-FUTURE-NEXT: bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_@notoc +; CHECK-LE-FUTURE-NEXT: dmxxsetaccz wacc0 +; CHECK-LE-FUTURE-NEXT: xvf32gerpp wacc0, v31, v30 +; CHECK-LE-FUTURE-NEXT: lxv vs0, 48(r1) +; CHECK-LE-FUTURE-NEXT: lxv vs1, 32(r1) +; CHECK-LE-FUTURE-NEXT: xvf32gerpp wacc0, vs1, vs0 +; CHECK-LE-FUTURE-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; CHECK-LE-FUTURE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-LE-FUTURE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-FUTURE-NEXT: stxv v5, 0(r29) +; CHECK-LE-FUTURE-NEXT: pstxv v4, 8(r29), 0 +; CHECK-LE-FUTURE-NEXT: stxv v3, 16(r29) +; CHECK-LE-FUTURE-NEXT: pstxv v2, 24(r29), 0 +; CHECK-LE-FUTURE-NEXT: lxv v31, -48(r30) # 16-byte Folded Reload +; CHECK-LE-FUTURE-NEXT: lxv v30, -64(r30) # 16-byte Folded Reload +; CHECK-LE-FUTURE-NEXT: ld r29, -24(r30) # 8-byte Folded Reload +; CHECK-LE-FUTURE-NEXT: mr r1, r30 +; CHECK-LE-FUTURE-NEXT: ld r0, 16(r1) +; CHECK-LE-FUTURE-NEXT: ld r30, -16(r1) +; CHECK-LE-FUTURE-NEXT: mtlr r0 +; CHECK-LE-FUTURE-NEXT: blr +; +; CHECK-BE-FUTURE-LABEL: testMultiply: +; CHECK-BE-FUTURE: # %bb.0: # %entry +; CHECK-BE-FUTURE-NEXT: mflr r0 +; CHECK-BE-FUTURE-NEXT: std r30, -16(r1) +; CHECK-BE-FUTURE-NEXT: std r0, 16(r1) +; CHECK-BE-FUTURE-NEXT: clrldi r0, r1, 59 +; CHECK-BE-FUTURE-NEXT: subfic r0, r0, -224 +; CHECK-BE-FUTURE-NEXT: mr r30, r1 +; CHECK-BE-FUTURE-NEXT: stdux r1, r1, r0 +; CHECK-BE-FUTURE-NEXT: stxv v30, -64(r30) # 16-byte Folded Spill +; CHECK-BE-FUTURE-NEXT: stxv v31, -48(r30) # 16-byte Folded Spill +; CHECK-BE-FUTURE-NEXT: lxv v31, 0(r3) +; CHECK-BE-FUTURE-NEXT: lxv v30, 0(r4) +; CHECK-BE-FUTURE-NEXT: addi r3, r1, 128 +; CHECK-BE-FUTURE-NEXT: std r29, -24(r30) # 8-byte Folded Spill +; CHECK-BE-FUTURE-NEXT: vmr v2, v31 +; CHECK-BE-FUTURE-NEXT: vmr v3, v30 +; CHECK-BE-FUTURE-NEXT: mr r29, r5 +; CHECK-BE-FUTURE-NEXT: bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_ +; CHECK-BE-FUTURE-NEXT: nop +; CHECK-BE-FUTURE-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-FUTURE-NEXT: xvf32gerpp wacc0, v31, v30 +; CHECK-BE-FUTURE-NEXT: lxv vs0, 128(r1) +; CHECK-BE-FUTURE-NEXT: lxv vs1, 144(r1) +; CHECK-BE-FUTURE-NEXT: xvf32gerpp wacc0, vs0, vs1 +; CHECK-BE-FUTURE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-FUTURE-NEXT: vmr v1, v2 +; CHECK-BE-FUTURE-NEXT: vmr v7, v4 +; CHECK-BE-FUTURE-NEXT: vmr v0, v3 +; CHECK-BE-FUTURE-NEXT: vmr v6, v5 +; CHECK-BE-FUTURE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-BE-FUTURE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-FUTURE-NEXT: stxv v2, 0(r29) +; CHECK-BE-FUTURE-NEXT: pstxv v3, 8(r29), 0 +; CHECK-BE-FUTURE-NEXT: stxv v4, 16(r29) +; CHECK-BE-FUTURE-NEXT: pstxv v5, 24(r29), 0 +; CHECK-BE-FUTURE-NEXT: lxv v31, -48(r30) # 16-byte Folded Reload +; CHECK-BE-FUTURE-NEXT: lxv v30, -64(r30) # 16-byte Folded Reload +; CHECK-BE-FUTURE-NEXT: ld r29, -24(r30) # 8-byte Folded Reload +; CHECK-BE-FUTURE-NEXT: mr r1, r30 +; CHECK-BE-FUTURE-NEXT: ld r0, 16(r1) +; CHECK-BE-FUTURE-NEXT: ld r30, -16(r1) +; CHECK-BE-FUTURE-NEXT: mtlr r0 +; CHECK-BE-FUTURE-NEXT: blr entry: %vP = alloca <256 x i1>, align 32 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %vP) diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll index 059d60a9608f8..1076230a0a7d7 100644 --- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll @@ -1,12 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -disable-auto-paired-vec-st=false < %s | FileCheck %s \ ; RUN: --check-prefix=LE-PAIRED +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -disable-auto-paired-vec-st=false < %s | FileCheck %s \ +; RUN: --check-prefix=LE-PAIRED-PWR10 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ -; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr -disable-auto-paired-vec-st=false < %s | \ ; RUN: FileCheck %s --check-prefix=BE-PAIRED +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -disable-auto-paired-vec-st=false < %s | \ +; RUN: FileCheck %s --check-prefix=BE-PAIRED-PWR10 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \ ; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \ ; RUN: | FileCheck %s --check-prefix=LE-PWR9 @@ -26,30 +34,60 @@ define dso_local void @testLdSt(i64 %SrcIdx, i64 %DstIdx) { ; LE-PAIRED-LABEL: testLdSt: ; LE-PAIRED: # %bb.0: # %entry -; LE-PAIRED-NEXT: plxv vs3, f@PCREL+64(0), 1 -; LE-PAIRED-NEXT: plxv vs2, f@PCREL+80(0), 1 -; LE-PAIRED-NEXT: plxv vs1, f@PCREL+96(0), 1 -; LE-PAIRED-NEXT: plxv vs0, f@PCREL+112(0), 1 -; LE-PAIRED-NEXT: pstxv vs0, f@PCREL+176(0), 1 -; LE-PAIRED-NEXT: pstxv vs1, f@PCREL+160(0), 1 -; LE-PAIRED-NEXT: pstxv vs2, f@PCREL+144(0), 1 -; LE-PAIRED-NEXT: pstxv vs3, f@PCREL+128(0), 1 +; LE-PAIRED-NEXT: plxv v3, f@PCREL+64(0), 1 +; LE-PAIRED-NEXT: plxv v5, f@PCREL+96(0), 1 +; LE-PAIRED-NEXT: plxv v2, f@PCREL+80(0), 1 +; LE-PAIRED-NEXT: plxv v4, f@PCREL+112(0), 1 +; LE-PAIRED-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; LE-PAIRED-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; LE-PAIRED-NEXT: pstxv v4, f@PCREL+176(0), 1 +; LE-PAIRED-NEXT: pstxv v5, f@PCREL+160(0), 1 +; LE-PAIRED-NEXT: pstxv v2, f@PCREL+144(0), 1 +; LE-PAIRED-NEXT: pstxv v3, f@PCREL+128(0), 1 ; LE-PAIRED-NEXT: blr ; +; LE-PAIRED-PWR10-LABEL: testLdSt: +; LE-PAIRED-PWR10: # %bb.0: # %entry +; LE-PAIRED-PWR10-NEXT: plxv vs3, f@PCREL+64(0), 1 +; LE-PAIRED-PWR10-NEXT: plxv vs2, f@PCREL+80(0), 1 +; LE-PAIRED-PWR10-NEXT: plxv vs1, f@PCREL+96(0), 1 +; LE-PAIRED-PWR10-NEXT: plxv vs0, f@PCREL+112(0), 1 +; LE-PAIRED-PWR10-NEXT: pstxv vs0, f@PCREL+176(0), 1 +; LE-PAIRED-PWR10-NEXT: pstxv vs1, f@PCREL+160(0), 1 +; LE-PAIRED-PWR10-NEXT: pstxv vs2, f@PCREL+144(0), 1 +; LE-PAIRED-PWR10-NEXT: pstxv vs3, f@PCREL+128(0), 1 +; LE-PAIRED-PWR10-NEXT: blr +; ; BE-PAIRED-LABEL: testLdSt: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r3, r2, f@toc@ha ; BE-PAIRED-NEXT: addi r3, r3, f@toc@l -; BE-PAIRED-NEXT: lxv vs3, 112(r3) -; BE-PAIRED-NEXT: lxv vs2, 96(r3) -; BE-PAIRED-NEXT: lxv vs1, 80(r3) -; BE-PAIRED-NEXT: lxv vs0, 64(r3) -; BE-PAIRED-NEXT: stxv vs1, 144(r3) -; BE-PAIRED-NEXT: stxv vs0, 128(r3) -; BE-PAIRED-NEXT: stxv vs3, 176(r3) -; BE-PAIRED-NEXT: stxv vs2, 160(r3) +; BE-PAIRED-NEXT: lxv v3, 112(r3) +; BE-PAIRED-NEXT: lxv v5, 80(r3) +; BE-PAIRED-NEXT: lxv v2, 96(r3) +; BE-PAIRED-NEXT: lxv v4, 64(r3) +; BE-PAIRED-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; BE-PAIRED-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; BE-PAIRED-NEXT: stxv v5, 176(r3) +; BE-PAIRED-NEXT: stxv v4, 160(r3) +; BE-PAIRED-NEXT: stxv v3, 144(r3) +; BE-PAIRED-NEXT: stxv v2, 128(r3) ; BE-PAIRED-NEXT: blr ; +; BE-PAIRED-PWR10-LABEL: testLdSt: +; BE-PAIRED-PWR10: # %bb.0: # %entry +; BE-PAIRED-PWR10-NEXT: addis r3, r2, f@toc@ha +; BE-PAIRED-PWR10-NEXT: addi r3, r3, f@toc@l +; BE-PAIRED-PWR10-NEXT: lxv vs3, 112(r3) +; BE-PAIRED-PWR10-NEXT: lxv vs2, 96(r3) +; BE-PAIRED-PWR10-NEXT: lxv vs1, 80(r3) +; BE-PAIRED-PWR10-NEXT: lxv vs0, 64(r3) +; BE-PAIRED-PWR10-NEXT: stxv vs1, 144(r3) +; BE-PAIRED-PWR10-NEXT: stxv vs0, 128(r3) +; BE-PAIRED-PWR10-NEXT: stxv vs3, 176(r3) +; BE-PAIRED-PWR10-NEXT: stxv vs2, 160(r3) +; BE-PAIRED-PWR10-NEXT: blr +; ; LE-PWR9-LABEL: testLdSt: ; LE-PWR9: # %bb.0: # %entry ; LE-PWR9-NEXT: addis r3, r2, f@toc@ha @@ -135,36 +173,75 @@ define dso_local void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) { ; LE-PAIRED-NEXT: paddi r5, 0, f@PCREL, 1 ; LE-PAIRED-NEXT: sldi r3, r3, 6 ; LE-PAIRED-NEXT: add r6, r5, r3 -; LE-PAIRED-NEXT: lxvx vs3, r5, r3 +; LE-PAIRED-NEXT: lxvx v3, r5, r3 +; LE-PAIRED-NEXT: lxv v2, 16(r6) +; LE-PAIRED-NEXT: lxv v5, 32(r6) +; LE-PAIRED-NEXT: lxv v4, 48(r6) ; LE-PAIRED-NEXT: sldi r3, r4, 6 ; LE-PAIRED-NEXT: add r4, r5, r3 -; LE-PAIRED-NEXT: lxv vs2, 16(r6) -; LE-PAIRED-NEXT: lxv vs1, 32(r6) -; LE-PAIRED-NEXT: lxv vs0, 48(r6) -; LE-PAIRED-NEXT: stxvx vs3, r5, r3 -; LE-PAIRED-NEXT: stxv vs0, 48(r4) -; LE-PAIRED-NEXT: stxv vs1, 32(r4) -; LE-PAIRED-NEXT: stxv vs2, 16(r4) +; LE-PAIRED-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; LE-PAIRED-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; LE-PAIRED-NEXT: stxvx v3, r5, r3 +; LE-PAIRED-NEXT: stxv v4, 48(r4) +; LE-PAIRED-NEXT: stxv v5, 32(r4) +; LE-PAIRED-NEXT: stxv v2, 16(r4) ; LE-PAIRED-NEXT: blr ; +; LE-PAIRED-PWR10-LABEL: testXLdSt: +; LE-PAIRED-PWR10: # %bb.0: # %entry +; LE-PAIRED-PWR10-NEXT: paddi r5, 0, f@PCREL, 1 +; LE-PAIRED-PWR10-NEXT: sldi r3, r3, 6 +; LE-PAIRED-PWR10-NEXT: add r6, r5, r3 +; LE-PAIRED-PWR10-NEXT: lxvx vs3, r5, r3 +; LE-PAIRED-PWR10-NEXT: sldi r3, r4, 6 +; LE-PAIRED-PWR10-NEXT: add r4, r5, r3 +; LE-PAIRED-PWR10-NEXT: lxv vs2, 16(r6) +; LE-PAIRED-PWR10-NEXT: lxv vs1, 32(r6) +; LE-PAIRED-PWR10-NEXT: lxv vs0, 48(r6) +; LE-PAIRED-PWR10-NEXT: stxvx vs3, r5, r3 +; LE-PAIRED-PWR10-NEXT: stxv vs0, 48(r4) +; LE-PAIRED-PWR10-NEXT: stxv vs1, 32(r4) +; LE-PAIRED-PWR10-NEXT: stxv vs2, 16(r4) +; LE-PAIRED-PWR10-NEXT: blr +; ; BE-PAIRED-LABEL: testXLdSt: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r5, r2, f@toc@ha ; BE-PAIRED-NEXT: addi r5, r5, f@toc@l ; BE-PAIRED-NEXT: sldi r3, r3, 6 ; BE-PAIRED-NEXT: add r6, r5, r3 -; BE-PAIRED-NEXT: lxv vs3, 48(r6) -; BE-PAIRED-NEXT: lxv vs2, 32(r6) -; BE-PAIRED-NEXT: lxvx vs0, r5, r3 -; BE-PAIRED-NEXT: lxv vs1, 16(r6) +; BE-PAIRED-NEXT: lxvx v2, r5, r3 +; BE-PAIRED-NEXT: lxv v5, 48(r6) +; BE-PAIRED-NEXT: lxv v3, 16(r6) +; BE-PAIRED-NEXT: lxv v4, 32(r6) ; BE-PAIRED-NEXT: sldi r3, r4, 6 ; BE-PAIRED-NEXT: add r4, r5, r3 -; BE-PAIRED-NEXT: stxvx vs0, r5, r3 -; BE-PAIRED-NEXT: stxv vs1, 16(r4) -; BE-PAIRED-NEXT: stxv vs3, 48(r4) -; BE-PAIRED-NEXT: stxv vs2, 32(r4) +; BE-PAIRED-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; BE-PAIRED-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; BE-PAIRED-NEXT: stxvx v2, r5, r3 +; BE-PAIRED-NEXT: stxv v5, 48(r4) +; BE-PAIRED-NEXT: stxv v4, 32(r4) +; BE-PAIRED-NEXT: stxv v3, 16(r4) ; BE-PAIRED-NEXT: blr ; +; BE-PAIRED-PWR10-LABEL: testXLdSt: +; BE-PAIRED-PWR10: # %bb.0: # %entry +; BE-PAIRED-PWR10-NEXT: addis r5, r2, f@toc@ha +; BE-PAIRED-PWR10-NEXT: addi r5, r5, f@toc@l +; BE-PAIRED-PWR10-NEXT: sldi r3, r3, 6 +; BE-PAIRED-PWR10-NEXT: add r6, r5, r3 +; BE-PAIRED-PWR10-NEXT: lxv vs3, 48(r6) +; BE-PAIRED-PWR10-NEXT: lxv vs2, 32(r6) +; BE-PAIRED-PWR10-NEXT: lxvx vs0, r5, r3 +; BE-PAIRED-PWR10-NEXT: lxv vs1, 16(r6) +; BE-PAIRED-PWR10-NEXT: sldi r3, r4, 6 +; BE-PAIRED-PWR10-NEXT: add r4, r5, r3 +; BE-PAIRED-PWR10-NEXT: stxvx vs0, r5, r3 +; BE-PAIRED-PWR10-NEXT: stxv vs1, 16(r4) +; BE-PAIRED-PWR10-NEXT: stxv vs3, 48(r4) +; BE-PAIRED-PWR10-NEXT: stxv vs2, 32(r4) +; BE-PAIRED-PWR10-NEXT: blr +; ; LE-PWR9-LABEL: testXLdSt: ; LE-PWR9: # %bb.0: # %entry ; LE-PWR9-NEXT: addis r5, r2, f@toc@ha @@ -253,30 +330,60 @@ entry: define dso_local void @testUnalignedLdSt() { ; LE-PAIRED-LABEL: testUnalignedLdSt: ; LE-PAIRED: # %bb.0: # %entry -; LE-PAIRED-NEXT: plxv vs3, f@PCREL+11(0), 1 -; LE-PAIRED-NEXT: plxv vs2, f@PCREL+27(0), 1 -; LE-PAIRED-NEXT: plxv vs1, f@PCREL+43(0), 1 -; LE-PAIRED-NEXT: plxv vs0, f@PCREL+59(0), 1 -; LE-PAIRED-NEXT: pstxv vs0, f@PCREL+67(0), 1 -; LE-PAIRED-NEXT: pstxv vs1, f@PCREL+51(0), 1 -; LE-PAIRED-NEXT: pstxv vs2, f@PCREL+35(0), 1 -; LE-PAIRED-NEXT: pstxv vs3, f@PCREL+19(0), 1 +; LE-PAIRED-NEXT: plxv v3, f@PCREL+11(0), 1 +; LE-PAIRED-NEXT: plxv v5, f@PCREL+43(0), 1 +; LE-PAIRED-NEXT: plxv v2, f@PCREL+27(0), 1 +; LE-PAIRED-NEXT: plxv v4, f@PCREL+59(0), 1 +; LE-PAIRED-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; LE-PAIRED-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; LE-PAIRED-NEXT: pstxv v4, f@PCREL+67(0), 1 +; LE-PAIRED-NEXT: pstxv v5, f@PCREL+51(0), 1 +; LE-PAIRED-NEXT: pstxv v2, f@PCREL+35(0), 1 +; LE-PAIRED-NEXT: pstxv v3, f@PCREL+19(0), 1 ; LE-PAIRED-NEXT: blr ; +; LE-PAIRED-PWR10-LABEL: testUnalignedLdSt: +; LE-PAIRED-PWR10: # %bb.0: # %entry +; LE-PAIRED-PWR10-NEXT: plxv vs3, f@PCREL+11(0), 1 +; LE-PAIRED-PWR10-NEXT: plxv vs2, f@PCREL+27(0), 1 +; LE-PAIRED-PWR10-NEXT: plxv vs1, f@PCREL+43(0), 1 +; LE-PAIRED-PWR10-NEXT: plxv vs0, f@PCREL+59(0), 1 +; LE-PAIRED-PWR10-NEXT: pstxv vs0, f@PCREL+67(0), 1 +; LE-PAIRED-PWR10-NEXT: pstxv vs1, f@PCREL+51(0), 1 +; LE-PAIRED-PWR10-NEXT: pstxv vs2, f@PCREL+35(0), 1 +; LE-PAIRED-PWR10-NEXT: pstxv vs3, f@PCREL+19(0), 1 +; LE-PAIRED-PWR10-NEXT: blr +; ; BE-PAIRED-LABEL: testUnalignedLdSt: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r3, r2, f@toc@ha ; BE-PAIRED-NEXT: addi r3, r3, f@toc@l -; BE-PAIRED-NEXT: plxv vs3, 59(r3), 0 -; BE-PAIRED-NEXT: plxv vs2, 43(r3), 0 -; BE-PAIRED-NEXT: plxv vs1, 27(r3), 0 -; BE-PAIRED-NEXT: plxv vs0, 11(r3), 0 -; BE-PAIRED-NEXT: pstxv vs1, 35(r3), 0 -; BE-PAIRED-NEXT: pstxv vs0, 19(r3), 0 -; BE-PAIRED-NEXT: pstxv vs3, 67(r3), 0 -; BE-PAIRED-NEXT: pstxv vs2, 51(r3), 0 +; BE-PAIRED-NEXT: plxv v3, 59(r3), 0 +; BE-PAIRED-NEXT: plxv v5, 27(r3), 0 +; BE-PAIRED-NEXT: plxv v2, 43(r3), 0 +; BE-PAIRED-NEXT: plxv v4, 11(r3), 0 +; BE-PAIRED-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; BE-PAIRED-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; BE-PAIRED-NEXT: pstxv v5, 67(r3), 0 +; BE-PAIRED-NEXT: pstxv v4, 51(r3), 0 +; BE-PAIRED-NEXT: pstxv v3, 35(r3), 0 +; BE-PAIRED-NEXT: pstxv v2, 19(r3), 0 ; BE-PAIRED-NEXT: blr ; +; BE-PAIRED-PWR10-LABEL: testUnalignedLdSt: +; BE-PAIRED-PWR10: # %bb.0: # %entry +; BE-PAIRED-PWR10-NEXT: addis r3, r2, f@toc@ha +; BE-PAIRED-PWR10-NEXT: addi r3, r3, f@toc@l +; BE-PAIRED-PWR10-NEXT: plxv vs3, 59(r3), 0 +; BE-PAIRED-PWR10-NEXT: plxv vs2, 43(r3), 0 +; BE-PAIRED-PWR10-NEXT: plxv vs1, 27(r3), 0 +; BE-PAIRED-PWR10-NEXT: plxv vs0, 11(r3), 0 +; BE-PAIRED-PWR10-NEXT: pstxv vs1, 35(r3), 0 +; BE-PAIRED-PWR10-NEXT: pstxv vs0, 19(r3), 0 +; BE-PAIRED-PWR10-NEXT: pstxv vs3, 67(r3), 0 +; BE-PAIRED-PWR10-NEXT: pstxv vs2, 51(r3), 0 +; BE-PAIRED-PWR10-NEXT: blr +; ; LE-PWR9-LABEL: testUnalignedLdSt: ; LE-PWR9: # %bb.0: # %entry ; LE-PWR9-NEXT: addis r3, r2, f@toc@ha @@ -381,16 +488,34 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) { ; LE-PAIRED-NEXT: pstxv vs1, g@PCREL+64(0), 1 ; LE-PAIRED-NEXT: blr ; +; LE-PAIRED-PWR10-LABEL: testLdStPair: +; LE-PAIRED-PWR10: # %bb.0: # %entry +; LE-PAIRED-PWR10-NEXT: plxv vs0, g@PCREL+48(0), 1 +; LE-PAIRED-PWR10-NEXT: plxv vs1, g@PCREL+32(0), 1 +; LE-PAIRED-PWR10-NEXT: pstxv vs0, g@PCREL+80(0), 1 +; LE-PAIRED-PWR10-NEXT: pstxv vs1, g@PCREL+64(0), 1 +; LE-PAIRED-PWR10-NEXT: blr +; ; BE-PAIRED-LABEL: testLdStPair: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r3, r2, g@toc@ha ; BE-PAIRED-NEXT: addi r3, r3, g@toc@l -; BE-PAIRED-NEXT: lxv vs0, 32(r3) -; BE-PAIRED-NEXT: lxv vs1, 48(r3) -; BE-PAIRED-NEXT: stxv vs1, 80(r3) -; BE-PAIRED-NEXT: stxv vs0, 64(r3) +; BE-PAIRED-NEXT: lxv vs0, 48(r3) +; BE-PAIRED-NEXT: lxv vs1, 32(r3) +; BE-PAIRED-NEXT: stxv vs0, 80(r3) +; BE-PAIRED-NEXT: stxv vs1, 64(r3) ; BE-PAIRED-NEXT: blr ; +; BE-PAIRED-PWR10-LABEL: testLdStPair: +; BE-PAIRED-PWR10: # %bb.0: # %entry +; BE-PAIRED-PWR10-NEXT: addis r3, r2, g@toc@ha +; BE-PAIRED-PWR10-NEXT: addi r3, r3, g@toc@l +; BE-PAIRED-PWR10-NEXT: lxv vs0, 32(r3) +; BE-PAIRED-PWR10-NEXT: lxv vs1, 48(r3) +; BE-PAIRED-PWR10-NEXT: stxv vs1, 80(r3) +; BE-PAIRED-PWR10-NEXT: stxv vs0, 64(r3) +; BE-PAIRED-PWR10-NEXT: blr +; ; LE-PWR9-LABEL: testLdStPair: ; LE-PWR9: # %bb.0: # %entry ; LE-PWR9-NEXT: addis r3, r2, g@toc@ha @@ -453,13 +578,26 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) { ; LE-PAIRED-NEXT: paddi r5, 0, g@PCREL, 1 ; LE-PAIRED-NEXT: add r6, r5, r3 ; LE-PAIRED-NEXT: lxvx vs0, r5, r3 +; LE-PAIRED-NEXT: lxv vs1, 16(r6) ; LE-PAIRED-NEXT: sldi r3, r4, 5 ; LE-PAIRED-NEXT: add r4, r5, r3 -; LE-PAIRED-NEXT: lxv vs1, 16(r6) ; LE-PAIRED-NEXT: stxvx vs0, r5, r3 ; LE-PAIRED-NEXT: stxv vs1, 16(r4) ; LE-PAIRED-NEXT: blr ; +; LE-PAIRED-PWR10-LABEL: testXLdStPair: +; LE-PAIRED-PWR10: # %bb.0: # %entry +; LE-PAIRED-PWR10-NEXT: sldi r3, r3, 5 +; LE-PAIRED-PWR10-NEXT: paddi r5, 0, g@PCREL, 1 +; LE-PAIRED-PWR10-NEXT: add r6, r5, r3 +; LE-PAIRED-PWR10-NEXT: lxvx vs0, r5, r3 +; LE-PAIRED-PWR10-NEXT: sldi r3, r4, 5 +; LE-PAIRED-PWR10-NEXT: add r4, r5, r3 +; LE-PAIRED-PWR10-NEXT: lxv vs1, 16(r6) +; LE-PAIRED-PWR10-NEXT: stxvx vs0, r5, r3 +; LE-PAIRED-PWR10-NEXT: stxv vs1, 16(r4) +; LE-PAIRED-PWR10-NEXT: blr +; ; BE-PAIRED-LABEL: testXLdStPair: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r5, r2, g@toc@ha @@ -467,13 +605,27 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) { ; BE-PAIRED-NEXT: addi r5, r5, g@toc@l ; BE-PAIRED-NEXT: add r6, r5, r3 ; BE-PAIRED-NEXT: lxvx vs0, r5, r3 +; BE-PAIRED-NEXT: lxv vs1, 16(r6) ; BE-PAIRED-NEXT: sldi r3, r4, 5 ; BE-PAIRED-NEXT: add r4, r5, r3 -; BE-PAIRED-NEXT: lxv vs1, 16(r6) ; BE-PAIRED-NEXT: stxvx vs0, r5, r3 ; BE-PAIRED-NEXT: stxv vs1, 16(r4) ; BE-PAIRED-NEXT: blr ; +; BE-PAIRED-PWR10-LABEL: testXLdStPair: +; BE-PAIRED-PWR10: # %bb.0: # %entry +; BE-PAIRED-PWR10-NEXT: addis r5, r2, g@toc@ha +; BE-PAIRED-PWR10-NEXT: sldi r3, r3, 5 +; BE-PAIRED-PWR10-NEXT: addi r5, r5, g@toc@l +; BE-PAIRED-PWR10-NEXT: add r6, r5, r3 +; BE-PAIRED-PWR10-NEXT: lxvx vs0, r5, r3 +; BE-PAIRED-PWR10-NEXT: sldi r3, r4, 5 +; BE-PAIRED-PWR10-NEXT: add r4, r5, r3 +; BE-PAIRED-PWR10-NEXT: lxv vs1, 16(r6) +; BE-PAIRED-PWR10-NEXT: stxvx vs0, r5, r3 +; BE-PAIRED-PWR10-NEXT: stxv vs1, 16(r4) +; BE-PAIRED-PWR10-NEXT: blr +; ; LE-PWR9-LABEL: testXLdStPair: ; LE-PWR9: # %bb.0: # %entry ; LE-PWR9-NEXT: addis r5, r2, g@toc@ha @@ -548,16 +700,34 @@ define dso_local void @testUnalignedLdStPair() { ; LE-PAIRED-NEXT: pstxv vs1, g@PCREL+19(0), 1 ; LE-PAIRED-NEXT: blr ; +; LE-PAIRED-PWR10-LABEL: testUnalignedLdStPair: +; LE-PAIRED-PWR10: # %bb.0: # %entry +; LE-PAIRED-PWR10-NEXT: plxv vs0, g@PCREL+27(0), 1 +; LE-PAIRED-PWR10-NEXT: plxv vs1, g@PCREL+11(0), 1 +; LE-PAIRED-PWR10-NEXT: pstxv vs0, g@PCREL+35(0), 1 +; LE-PAIRED-PWR10-NEXT: pstxv vs1, g@PCREL+19(0), 1 +; LE-PAIRED-PWR10-NEXT: blr +; ; BE-PAIRED-LABEL: testUnalignedLdStPair: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r3, r2, g@toc@ha ; BE-PAIRED-NEXT: addi r3, r3, g@toc@l -; BE-PAIRED-NEXT: plxv vs0, 11(r3), 0 -; BE-PAIRED-NEXT: plxv vs1, 27(r3), 0 -; BE-PAIRED-NEXT: pstxv vs1, 35(r3), 0 -; BE-PAIRED-NEXT: pstxv vs0, 19(r3), 0 +; BE-PAIRED-NEXT: plxv vs0, 27(r3), 0 +; BE-PAIRED-NEXT: plxv vs1, 11(r3), 0 +; BE-PAIRED-NEXT: pstxv vs0, 35(r3), 0 +; BE-PAIRED-NEXT: pstxv vs1, 19(r3), 0 ; BE-PAIRED-NEXT: blr ; +; BE-PAIRED-PWR10-LABEL: testUnalignedLdStPair: +; BE-PAIRED-PWR10: # %bb.0: # %entry +; BE-PAIRED-PWR10-NEXT: addis r3, r2, g@toc@ha +; BE-PAIRED-PWR10-NEXT: addi r3, r3, g@toc@l +; BE-PAIRED-PWR10-NEXT: plxv vs0, 11(r3), 0 +; BE-PAIRED-PWR10-NEXT: plxv vs1, 27(r3), 0 +; BE-PAIRED-PWR10-NEXT: pstxv vs1, 35(r3), 0 +; BE-PAIRED-PWR10-NEXT: pstxv vs0, 19(r3), 0 +; BE-PAIRED-PWR10-NEXT: blr +; ; LE-PWR9-LABEL: testUnalignedLdStPair: ; LE-PWR9: # %bb.0: # %entry ; LE-PWR9-NEXT: addis r3, r2, g@toc@ha diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll index abc65bed5bf6c..9db8ba1c9eb09 100644 --- a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll @@ -13,6 +13,13 @@ ; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-LE-WACC +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC + declare <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>) declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) declare void @foo() @@ -119,6 +126,101 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i ; CHECK-BE-NEXT: ld r0, 16(r1) ; CHECK-BE-NEXT: mtlr r0 ; CHECK-BE-NEXT: blr +; +; CHECK-LE-WACC-LABEL: intrinsics1: +; CHECK-LE-WACC: # %bb.0: +; CHECK-LE-WACC-NEXT: mflr r0 +; CHECK-LE-WACC-NEXT: std r0, 16(r1) +; CHECK-LE-WACC-NEXT: stdu r1, -176(r1) +; CHECK-LE-WACC-NEXT: .cfi_def_cfa_offset 176 +; CHECK-LE-WACC-NEXT: .cfi_offset lr, 16 +; CHECK-LE-WACC-NEXT: .cfi_offset r30, -16 +; CHECK-LE-WACC-NEXT: .cfi_offset v28, -80 +; CHECK-LE-WACC-NEXT: .cfi_offset v29, -64 +; CHECK-LE-WACC-NEXT: .cfi_offset v30, -48 +; CHECK-LE-WACC-NEXT: .cfi_offset v31, -32 +; CHECK-LE-WACC-NEXT: stxv v28, 96(r1) # 16-byte Folded Spill +; CHECK-LE-WACC-NEXT: stxv v29, 112(r1) # 16-byte Folded Spill +; CHECK-LE-WACC-NEXT: stxv v30, 128(r1) # 16-byte Folded Spill +; CHECK-LE-WACC-NEXT: stxv v31, 144(r1) # 16-byte Folded Spill +; CHECK-LE-WACC-NEXT: vmr v31, v5 +; CHECK-LE-WACC-NEXT: vmr v29, v3 +; CHECK-LE-WACC-NEXT: vmr v30, v4 +; CHECK-LE-WACC-NEXT: vmr v28, v2 +; CHECK-LE-WACC-NEXT: std r30, 160(r1) # 8-byte Folded Spill +; CHECK-LE-WACC-NEXT: ld r30, 272(r1) +; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp60, vsp62, 0 +; CHECK-LE-WACC-NEXT: xvf16ger2pp wacc0, v2, v4 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxvp vsp36, 64(r1) +; CHECK-LE-WACC-NEXT: stxvp vsp34, 32(r1) +; CHECK-LE-WACC-NEXT: bl foo@notoc +; CHECK-LE-WACC-NEXT: lxvp vsp34, 64(r1) +; CHECK-LE-WACC-NEXT: lxvp vsp36, 32(r1) +; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-LE-WACC-NEXT: xvf16ger2pp wacc0, v28, v30 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxv v4, 48(r30) +; CHECK-LE-WACC-NEXT: stxv v5, 32(r30) +; CHECK-LE-WACC-NEXT: stxv v2, 16(r30) +; CHECK-LE-WACC-NEXT: stxv v3, 0(r30) +; CHECK-LE-WACC-NEXT: lxv v31, 144(r1) # 16-byte Folded Reload +; CHECK-LE-WACC-NEXT: lxv v30, 128(r1) # 16-byte Folded Reload +; CHECK-LE-WACC-NEXT: lxv v29, 112(r1) # 16-byte Folded Reload +; CHECK-LE-WACC-NEXT: lxv v28, 96(r1) # 16-byte Folded Reload +; CHECK-LE-WACC-NEXT: ld r30, 160(r1) # 8-byte Folded Reload +; CHECK-LE-WACC-NEXT: addi r1, r1, 176 +; CHECK-LE-WACC-NEXT: ld r0, 16(r1) +; CHECK-LE-WACC-NEXT: mtlr r0 +; CHECK-LE-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: intrinsics1: +; CHECK-BE-WACC: # %bb.0: +; CHECK-BE-WACC-NEXT: mflr r0 +; CHECK-BE-WACC-NEXT: std r0, 16(r1) +; CHECK-BE-WACC-NEXT: stdu r1, -256(r1) +; CHECK-BE-WACC-NEXT: .cfi_def_cfa_offset 256 +; CHECK-BE-WACC-NEXT: .cfi_offset lr, 16 +; CHECK-BE-WACC-NEXT: .cfi_offset r30, -16 +; CHECK-BE-WACC-NEXT: .cfi_offset v28, -80 +; CHECK-BE-WACC-NEXT: .cfi_offset v29, -64 +; CHECK-BE-WACC-NEXT: .cfi_offset v30, -48 +; CHECK-BE-WACC-NEXT: .cfi_offset v31, -32 +; CHECK-BE-WACC-NEXT: stxv v28, 176(r1) # 16-byte Folded Spill +; CHECK-BE-WACC-NEXT: stxv v29, 192(r1) # 16-byte Folded Spill +; CHECK-BE-WACC-NEXT: stxv v30, 208(r1) # 16-byte Folded Spill +; CHECK-BE-WACC-NEXT: stxv v31, 224(r1) # 16-byte Folded Spill +; CHECK-BE-WACC-NEXT: vmr v31, v5 +; CHECK-BE-WACC-NEXT: vmr v29, v3 +; CHECK-BE-WACC-NEXT: vmr v30, v4 +; CHECK-BE-WACC-NEXT: vmr v28, v2 +; CHECK-BE-WACC-NEXT: std r30, 240(r1) # 8-byte Folded Spill +; CHECK-BE-WACC-NEXT: ld r30, 368(r1) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp60, vsp62, 0 +; CHECK-BE-WACC-NEXT: xvf16ger2pp wacc0, v2, v4 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxvp vsp36, 112(r1) +; CHECK-BE-WACC-NEXT: stxvp vsp34, 144(r1) +; CHECK-BE-WACC-NEXT: bl foo +; CHECK-BE-WACC-NEXT: nop +; CHECK-BE-WACC-NEXT: lxvp vsp34, 112(r1) +; CHECK-BE-WACC-NEXT: lxvp vsp36, 144(r1) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf16ger2pp wacc0, v28, v30 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r30) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r30) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r30) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r30) +; CHECK-BE-WACC-NEXT: lxv v31, 224(r1) # 16-byte Folded Reload +; CHECK-BE-WACC-NEXT: lxv v30, 208(r1) # 16-byte Folded Reload +; CHECK-BE-WACC-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload +; CHECK-BE-WACC-NEXT: lxv v28, 176(r1) # 16-byte Folded Reload +; CHECK-BE-WACC-NEXT: ld r30, 240(r1) # 8-byte Folded Reload +; CHECK-BE-WACC-NEXT: addi r1, r1, 256 +; CHECK-BE-WACC-NEXT: ld r0, 16(r1) +; CHECK-BE-WACC-NEXT: mtlr r0 +; CHECK-BE-WACC-NEXT: blr %1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4) %2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc3) tail call void @foo() diff --git a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll index 89e5147aecc5f..37d0e69b3beaa 100644 --- a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll +++ b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll @@ -5,6 +5,12 @@ ; RUN: llc -O3 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -O3 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-WACC +; RUN: llc -O3 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>) declare <512 x i1> @llvm.ppc.mma.xxsetaccz() @@ -64,6 +70,60 @@ define void @testPHI1(ptr %Dst, ptr %Src, i32 signext %Len) { ; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: testPHI1: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: cmpwi r5, 3 +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: blt cr0, .LBB0_3 +; CHECK-WACC-NEXT: # %bb.1: # %for.body.preheader +; CHECK-WACC-NEXT: clrldi r5, r5, 32 +; CHECK-WACC-NEXT: addi r5, r5, -2 +; CHECK-WACC-NEXT: lxv v2, 0(r4) +; CHECK-WACC-NEXT: lxv v3, 16(r4) +; CHECK-WACC-NEXT: mtctr r5 +; CHECK-WACC-NEXT: addi r4, r4, 32 +; CHECK-WACC-NEXT: .p2align 4 +; CHECK-WACC-NEXT: .LBB0_2: # %for.body +; CHECK-WACC-NEXT: # +; CHECK-WACC-NEXT: lxv vs0, 0(r4) +; CHECK-WACC-NEXT: addi r4, r4, 16 +; CHECK-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0 +; CHECK-WACC-NEXT: bdnz .LBB0_2 +; CHECK-WACC-NEXT: .LBB0_3: # %for.cond.cleanup +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v5, 0(r3) +; CHECK-WACC-NEXT: stxv v4, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 48(r3) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: testPHI1: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: cmpwi r5, 3 +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: blt cr0, .LBB0_3 +; CHECK-BE-WACC-NEXT: # %bb.1: # %for.body.preheader +; CHECK-BE-WACC-NEXT: clrldi r5, r5, 32 +; CHECK-BE-WACC-NEXT: addi r5, r5, -2 +; CHECK-BE-WACC-NEXT: lxv v2, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v3, 16(r4) +; CHECK-BE-WACC-NEXT: mtctr r5 +; CHECK-BE-WACC-NEXT: addi r4, r4, 32 +; CHECK-BE-WACC-NEXT: .p2align 4 +; CHECK-BE-WACC-NEXT: .LBB0_2: # %for.body +; CHECK-BE-WACC-NEXT: # +; CHECK-BE-WACC-NEXT: lxv vs0, 0(r4) +; CHECK-BE-WACC-NEXT: addi r4, r4, 16 +; CHECK-BE-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0 +; CHECK-BE-WACC-NEXT: bdnz .LBB0_2 +; CHECK-BE-WACC-NEXT: .LBB0_3: # %for.cond.cleanup +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <16 x i8>, ptr %Src, align 16 %arrayidx1 = getelementptr inbounds <16 x i8>, ptr %Src, i64 1 @@ -161,6 +221,62 @@ define dso_local void @testPHI2(ptr %Dst, ptr %Src, i32 signext %Len) { ; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: testPHI2: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v2, 0(r4) +; CHECK-WACC-NEXT: lxv v3, 16(r4) +; CHECK-WACC-NEXT: lxv vs0, 32(r4) +; CHECK-WACC-NEXT: cmpwi r5, 4 +; CHECK-WACC-NEXT: xvf64ger wacc0, vsp34, vs0 +; CHECK-WACC-NEXT: blt cr0, .LBB1_3 +; CHECK-WACC-NEXT: # %bb.1: # %for.body.preheader +; CHECK-WACC-NEXT: clrldi r5, r5, 32 +; CHECK-WACC-NEXT: addi r5, r5, -3 +; CHECK-WACC-NEXT: mtctr r5 +; CHECK-WACC-NEXT: addi r4, r4, 48 +; CHECK-WACC-NEXT: .p2align 4 +; CHECK-WACC-NEXT: .LBB1_2: # %for.body +; CHECK-WACC-NEXT: # +; CHECK-WACC-NEXT: lxv vs0, 0(r4) +; CHECK-WACC-NEXT: addi r4, r4, 16 +; CHECK-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0 +; CHECK-WACC-NEXT: bdnz .LBB1_2 +; CHECK-WACC-NEXT: .LBB1_3: # %for.cond.cleanup +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v5, 0(r3) +; CHECK-WACC-NEXT: stxv v4, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 48(r3) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: testPHI2: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v2, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v3, 16(r4) +; CHECK-BE-WACC-NEXT: lxv vs0, 32(r4) +; CHECK-BE-WACC-NEXT: cmpwi r5, 4 +; CHECK-BE-WACC-NEXT: xvf64ger wacc0, vsp34, vs0 +; CHECK-BE-WACC-NEXT: blt cr0, .LBB1_3 +; CHECK-BE-WACC-NEXT: # %bb.1: # %for.body.preheader +; CHECK-BE-WACC-NEXT: clrldi r5, r5, 32 +; CHECK-BE-WACC-NEXT: addi r5, r5, -3 +; CHECK-BE-WACC-NEXT: mtctr r5 +; CHECK-BE-WACC-NEXT: addi r4, r4, 48 +; CHECK-BE-WACC-NEXT: .p2align 4 +; CHECK-BE-WACC-NEXT: .LBB1_2: # %for.body +; CHECK-BE-WACC-NEXT: # +; CHECK-BE-WACC-NEXT: lxv vs0, 0(r4) +; CHECK-BE-WACC-NEXT: addi r4, r4, 16 +; CHECK-BE-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0 +; CHECK-BE-WACC-NEXT: bdnz .LBB1_2 +; CHECK-BE-WACC-NEXT: .LBB1_3: # %for.cond.cleanup +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <16 x i8>, ptr %Src, align 16 %arrayidx1 = getelementptr inbounds <16 x i8>, ptr %Src, i64 1 @@ -229,6 +345,28 @@ define void @testImplicitDef(ptr %ptr) { ; CHECK-BE-NEXT: xxmfacc acc0 ; CHECK-BE-NEXT: stxv vs3, 0(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: testImplicitDef: +; CHECK-WACC: # %bb.0: # %label1 +; CHECK-WACC-NEXT: # implicit-def: $wacc0 +; CHECK-WACC-NEXT: bc 12, 4*cr5+lt, .LBB2_2 +; CHECK-WACC-NEXT: # %bb.1: # %label2 +; CHECK-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0 +; CHECK-WACC-NEXT: .LBB2_2: # %label3 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v2, 0(r3) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: testImplicitDef: +; CHECK-BE-WACC: # %bb.0: # %label1 +; CHECK-BE-WACC-NEXT: # implicit-def: $wacc0 +; CHECK-BE-WACC-NEXT: bc 12, 4*cr5+lt, .LBB2_2 +; CHECK-BE-WACC-NEXT: # %bb.1: # %label2 +; CHECK-BE-WACC-NEXT: xvf64gerpp wacc0, vsp34, vs0 +; CHECK-BE-WACC-NEXT: .LBB2_2: # %label3 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 0(r3) +; CHECK-BE-WACC-NEXT: blr label1: br i1 undef, label %label3, label %label2 @@ -312,6 +450,70 @@ define dso_local signext i32 @testNestedPHI(i32 signext %cond, i32 signext %coun ; CHECK-BE-NEXT: stxv vs3, 48(r5) ; CHECK-BE-NEXT: stxv vs2, 32(r5) ; CHECK-BE-NEXT: blr +; +; CHECK-WACC-LABEL: testNestedPHI: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: cmplwi r3, 0 +; CHECK-WACC-NEXT: beq cr0, .LBB3_2 +; CHECK-WACC-NEXT: # %bb.1: # %if.then +; CHECK-WACC-NEXT: xvf32gernp wacc0, v2, v2 +; CHECK-WACC-NEXT: cmpwi r4, 1 +; CHECK-WACC-NEXT: bge cr0, .LBB3_3 +; CHECK-WACC-NEXT: b .LBB3_5 +; CHECK-WACC-NEXT: .LBB3_2: +; CHECK-WACC-NEXT: # implicit-def: $wacc0 +; CHECK-WACC-NEXT: cmpwi r4, 1 +; CHECK-WACC-NEXT: blt cr0, .LBB3_5 +; CHECK-WACC-NEXT: .LBB3_3: # %for.body.preheader +; CHECK-WACC-NEXT: addi r3, r4, -1 +; CHECK-WACC-NEXT: clrldi r3, r3, 32 +; CHECK-WACC-NEXT: addi r3, r3, 1 +; CHECK-WACC-NEXT: mtctr r3 +; CHECK-WACC-NEXT: .p2align 4 +; CHECK-WACC-NEXT: .LBB3_4: # %for.body +; CHECK-WACC-NEXT: # +; CHECK-WACC-NEXT: xvf32gernp wacc0, v2, v2 +; CHECK-WACC-NEXT: bdnz .LBB3_4 +; CHECK-WACC-NEXT: .LBB3_5: # %for.cond.cleanup +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: li r3, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r5) +; CHECK-WACC-NEXT: stxv v5, 32(r5) +; CHECK-WACC-NEXT: stxv v2, 16(r5) +; CHECK-WACC-NEXT: stxv v3, 0(r5) +; CHECK-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: testNestedPHI: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: cmplwi r3, 0 +; CHECK-BE-WACC-NEXT: beq cr0, .LBB3_2 +; CHECK-BE-WACC-NEXT: # %bb.1: # %if.then +; CHECK-BE-WACC-NEXT: xvf32gernp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: cmpwi r4, 1 +; CHECK-BE-WACC-NEXT: bge cr0, .LBB3_3 +; CHECK-BE-WACC-NEXT: b .LBB3_5 +; CHECK-BE-WACC-NEXT: .LBB3_2: +; CHECK-BE-WACC-NEXT: # implicit-def: $wacc0 +; CHECK-BE-WACC-NEXT: cmpwi r4, 1 +; CHECK-BE-WACC-NEXT: blt cr0, .LBB3_5 +; CHECK-BE-WACC-NEXT: .LBB3_3: # %for.body.preheader +; CHECK-BE-WACC-NEXT: addi r3, r4, -1 +; CHECK-BE-WACC-NEXT: clrldi r3, r3, 32 +; CHECK-BE-WACC-NEXT: addi r3, r3, 1 +; CHECK-BE-WACC-NEXT: mtctr r3 +; CHECK-BE-WACC-NEXT: .p2align 4 +; CHECK-BE-WACC-NEXT: .LBB3_4: # %for.body +; CHECK-BE-WACC-NEXT: # +; CHECK-BE-WACC-NEXT: xvf32gernp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: bdnz .LBB3_4 +; CHECK-BE-WACC-NEXT: .LBB3_5: # %for.cond.cleanup +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: li r3, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r5) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r5) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r5) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r5) +; CHECK-BE-WACC-NEXT: blr entry: %tobool.not = icmp eq i32 %cond, 0 br i1 %tobool.not, label %if.end, label %if.then From 25df73d2bd2fcaef9685ef318f4d8e6aea87a38b Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Thu, 30 Oct 2025 16:25:41 -0500 Subject: [PATCH 2/5] update pattern matching for xvf64gernp for cpu=future and add test run line --- llvm/lib/Target/PowerPC/PPCInstrMMA.td | 2 +- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll | 1189 +++-- .../test/CodeGen/PowerPC/mma-outer-product.ll | 4166 +++++++++++++---- 3 files changed, 4156 insertions(+), 1201 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td index b38dd4ae948c6..686e0209d6e1f 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrMMA.td +++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td @@ -765,7 +765,7 @@ let Predicates = [MMA, IsISAFuture] in { def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)), (XVF64GERWPN $ATi, $XA, RCCp.BToVSRC)>; def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)), - (XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>; + (XVF64GERWNP $ATi, $XA, RCCp.BToVSRC)>; def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)), (XVF64GERWNN $ATi, $XA, RCCp.BToVSRC)>; diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll index 8fbc9d785796d..6999e9c1521c4 100644 --- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -1,37 +1,65 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ -; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-PWR10 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-PWR10 ; assemble_acc declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) define void @ass_acc(ptr %ptr, <16 x i8> %vc) { ; CHECK-LABEL: ass_acc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlor vs3, v2, v2 -; CHECK-NEXT: xxlor vs2, v2, v2 -; CHECK-NEXT: xxlor vs0, vs2, vs2 -; CHECK-NEXT: xxlor vs1, vs3, vs3 -; CHECK-NEXT: stxv vs0, 48(r3) -; CHECK-NEXT: stxv vs1, 32(r3) -; CHECK-NEXT: stxv vs2, 16(r3) -; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r3) +; CHECK-NEXT: stxv v5, 32(r3) +; CHECK-NEXT: stxv v2, 16(r3) +; CHECK-NEXT: stxv v3, 0(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: ass_acc: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxlor vs3, v2, v2 -; CHECK-BE-NEXT: xxlor vs2, v2, v2 -; CHECK-BE-NEXT: xxlor vs0, vs2, vs2 -; CHECK-BE-NEXT: xxlor vs1, vs3, vs3 -; CHECK-BE-NEXT: stxv vs1, 16(r3) -; CHECK-BE-NEXT: stxv vs0, 0(r3) -; CHECK-BE-NEXT: stxv vs3, 48(r3) -; CHECK-BE-NEXT: stxv vs2, 32(r3) +; CHECK-BE-NEXT: vmr v3, v2 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r3) +; CHECK-BE-NEXT: stxv v4, 32(r3) +; CHECK-BE-NEXT: stxv v3, 16(r3) +; CHECK-BE-NEXT: stxv v2, 0(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: ass_acc: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: xxlor vs3, v2, v2 +; CHECK-PWR10-NEXT: xxlor vs2, v2, v2 +; CHECK-PWR10-NEXT: xxlor vs0, vs2, vs2 +; CHECK-PWR10-NEXT: xxlor vs1, vs3, vs3 +; CHECK-PWR10-NEXT: stxv vs0, 48(r3) +; CHECK-PWR10-NEXT: stxv vs1, 32(r3) +; CHECK-PWR10-NEXT: stxv vs2, 16(r3) +; CHECK-PWR10-NEXT: stxv vs3, 0(r3) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: ass_acc: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: xxlor vs3, v2, v2 +; CHECK-BE-PWR10-NEXT: xxlor vs2, v2, v2 +; CHECK-BE-PWR10-NEXT: xxlor vs0, vs2, vs2 +; CHECK-BE-PWR10-NEXT: xxlor vs1, vs3, vs3 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc) store <512 x i1> %0, ptr %ptr, align 64 @@ -43,29 +71,51 @@ declare <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1>) define void @int_xxmtacc(ptr %ptr, <16 x i8> %vc) { ; CHECK-LABEL: int_xxmtacc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlor vs3, v2, v2 -; CHECK-NEXT: xxlor vs2, v2, v2 -; CHECK-NEXT: xxlor vs0, vs2, vs2 -; CHECK-NEXT: xxlor vs1, vs3, vs3 -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: stxv vs0, 48(r3) -; CHECK-NEXT: stxv vs1, 32(r3) -; CHECK-NEXT: stxv vs2, 16(r3) -; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r3) +; CHECK-NEXT: stxv v5, 32(r3) +; CHECK-NEXT: stxv v2, 16(r3) +; CHECK-NEXT: stxv v3, 0(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: int_xxmtacc: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxlor vs3, v2, v2 -; CHECK-BE-NEXT: xxlor vs2, v2, v2 -; CHECK-BE-NEXT: xxlor vs0, vs2, vs2 -; CHECK-BE-NEXT: xxlor vs1, vs3, vs3 -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r3) -; CHECK-BE-NEXT: stxv vs0, 0(r3) -; CHECK-BE-NEXT: stxv vs3, 48(r3) -; CHECK-BE-NEXT: stxv vs2, 32(r3) +; CHECK-BE-NEXT: vmr v3, v2 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r3) +; CHECK-BE-NEXT: stxv v4, 32(r3) +; CHECK-BE-NEXT: stxv v3, 16(r3) +; CHECK-BE-NEXT: stxv v2, 0(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: int_xxmtacc: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: xxlor vs3, v2, v2 +; CHECK-PWR10-NEXT: xxlor vs2, v2, v2 +; CHECK-PWR10-NEXT: xxlor vs0, vs2, vs2 +; CHECK-PWR10-NEXT: xxlor vs1, vs3, vs3 +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r3) +; CHECK-PWR10-NEXT: stxv vs1, 32(r3) +; CHECK-PWR10-NEXT: stxv vs2, 16(r3) +; CHECK-PWR10-NEXT: stxv vs3, 0(r3) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: int_xxmtacc: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: xxlor vs3, v2, v2 +; CHECK-BE-PWR10-NEXT: xxlor vs2, v2, v2 +; CHECK-BE-PWR10-NEXT: xxlor vs0, vs2, vs2 +; CHECK-BE-PWR10-NEXT: xxlor vs1, vs3, vs3 +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: blr entry: ; One xxmtacc is generated from the call to assemble.acc then one xxmtacc is ; generated from the call to xxmtacc then one xxmfacc is generated for the store @@ -80,27 +130,49 @@ declare <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1>) define void @int_xxmfacc(ptr %ptr, <16 x i8> %vc) { ; CHECK-LABEL: int_xxmfacc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlor vs3, v2, v2 -; CHECK-NEXT: xxlor vs2, v2, v2 -; CHECK-NEXT: xxlor vs0, vs2, vs2 -; CHECK-NEXT: xxlor vs1, vs3, vs3 -; CHECK-NEXT: stxv vs0, 48(r3) -; CHECK-NEXT: stxv vs1, 32(r3) -; CHECK-NEXT: stxv vs2, 16(r3) -; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r3) +; CHECK-NEXT: stxv v5, 32(r3) +; CHECK-NEXT: stxv v2, 16(r3) +; CHECK-NEXT: stxv v3, 0(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: int_xxmfacc: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxlor vs3, v2, v2 -; CHECK-BE-NEXT: xxlor vs2, v2, v2 -; CHECK-BE-NEXT: xxlor vs0, vs2, vs2 -; CHECK-BE-NEXT: xxlor vs1, vs3, vs3 -; CHECK-BE-NEXT: stxv vs1, 16(r3) -; CHECK-BE-NEXT: stxv vs0, 0(r3) -; CHECK-BE-NEXT: stxv vs3, 48(r3) -; CHECK-BE-NEXT: stxv vs2, 32(r3) +; CHECK-BE-NEXT: vmr v3, v2 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r3) +; CHECK-BE-NEXT: stxv v4, 32(r3) +; CHECK-BE-NEXT: stxv v3, 16(r3) +; CHECK-BE-NEXT: stxv v2, 0(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: int_xxmfacc: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: xxlor vs3, v2, v2 +; CHECK-PWR10-NEXT: xxlor vs2, v2, v2 +; CHECK-PWR10-NEXT: xxlor vs0, vs2, vs2 +; CHECK-PWR10-NEXT: xxlor vs1, vs3, vs3 +; CHECK-PWR10-NEXT: stxv vs0, 48(r3) +; CHECK-PWR10-NEXT: stxv vs1, 32(r3) +; CHECK-PWR10-NEXT: stxv vs2, 16(r3) +; CHECK-PWR10-NEXT: stxv vs3, 0(r3) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: int_xxmfacc: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: xxlor vs3, v2, v2 +; CHECK-BE-PWR10-NEXT: xxlor vs2, v2, v2 +; CHECK-BE-PWR10-NEXT: xxlor vs0, vs2, vs2 +; CHECK-BE-PWR10-NEXT: xxlor vs1, vs3, vs3 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: blr entry: ; One xxmtacc is generated from the call to assemble.acc then one xxmfacc is ; generated from the call to xxmfacc then one xxmfacc is generated for the store @@ -115,23 +187,43 @@ declare <512 x i1> @llvm.ppc.mma.xxsetaccz() define void @int_xxsetaccz(ptr %ptr) { ; CHECK-LABEL: int_xxsetaccz: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsetaccz acc0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r3) -; CHECK-NEXT: stxv vs1, 32(r3) -; CHECK-NEXT: stxv vs2, 16(r3) -; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: dmxxsetaccz wacc0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r3) +; CHECK-NEXT: stxv v5, 32(r3) +; CHECK-NEXT: stxv v2, 16(r3) +; CHECK-NEXT: stxv v3, 0(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: int_xxsetaccz: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsetaccz acc0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r3) -; CHECK-BE-NEXT: stxv vs0, 0(r3) -; CHECK-BE-NEXT: stxv vs3, 48(r3) -; CHECK-BE-NEXT: stxv vs2, 32(r3) +; CHECK-BE-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r3) +; CHECK-BE-NEXT: stxv v4, 32(r3) +; CHECK-BE-NEXT: stxv v3, 16(r3) +; CHECK-BE-NEXT: stxv v2, 0(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: int_xxsetaccz: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: xxsetaccz acc0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r3) +; CHECK-PWR10-NEXT: stxv vs1, 32(r3) +; CHECK-PWR10-NEXT: stxv vs2, 16(r3) +; CHECK-PWR10-NEXT: stxv vs3, 0(r3) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: int_xxsetaccz: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: xxsetaccz acc0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() store <512 x i1> %0, ptr %ptr, align 64 @@ -143,23 +235,43 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) { ; CHECK-LABEL: disass_acc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsetaccz acc0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs3, 0(r3) -; CHECK-NEXT: stxv vs2, 0(r4) -; CHECK-NEXT: stxv vs1, 0(r5) -; CHECK-NEXT: stxv vs0, 0(r6) +; CHECK-NEXT: dmxxsetaccz wacc0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v5, 0(r3) +; CHECK-NEXT: stxv v4, 0(r4) +; CHECK-NEXT: stxv v3, 0(r5) +; CHECK-NEXT: stxv v2, 0(r6) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: disass_acc: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsetaccz acc0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs0, 0(r3) -; CHECK-BE-NEXT: stxv vs1, 0(r4) -; CHECK-BE-NEXT: stxv vs2, 0(r5) -; CHECK-BE-NEXT: stxv vs3, 0(r6) +; CHECK-BE-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v2, 0(r3) +; CHECK-BE-NEXT: stxv v3, 0(r4) +; CHECK-BE-NEXT: stxv v4, 0(r5) +; CHECK-BE-NEXT: stxv v5, 0(r6) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: disass_acc: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: xxsetaccz acc0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs3, 0(r3) +; CHECK-PWR10-NEXT: stxv vs2, 0(r4) +; CHECK-PWR10-NEXT: stxv vs1, 0(r5) +; CHECK-PWR10-NEXT: stxv vs0, 0(r6) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: disass_acc: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: xxsetaccz acc0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: stxv vs1, 0(r4) +; CHECK-BE-PWR10-NEXT: stxv vs2, 0(r5) +; CHECK-BE-PWR10-NEXT: stxv vs3, 0(r6) +; CHECK-BE-PWR10-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0) @@ -181,21 +293,21 @@ define void @testBranch(ptr %ptr, <16 x i8> %vc, i32 %val) { ; CHECK-NEXT: cmplwi r7, 0 ; CHECK-NEXT: beq cr0, .LBB5_2 ; CHECK-NEXT: # %bb.1: # %if.then -; CHECK-NEXT: xxsetaccz acc0 +; CHECK-NEXT: dmxxsetaccz wacc0 ; CHECK-NEXT: b .LBB5_3 ; CHECK-NEXT: .LBB5_2: # %if.else -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xvi4ger8pp acc0, v2, v2 +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: xvi4ger8pp wacc0, v2, v2 ; CHECK-NEXT: .LBB5_3: # %if.end -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r3) -; CHECK-NEXT: stxv vs1, 32(r3) -; CHECK-NEXT: stxv vs2, 16(r3) -; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r3) +; CHECK-NEXT: stxv v5, 32(r3) +; CHECK-NEXT: stxv v2, 16(r3) +; CHECK-NEXT: stxv v3, 0(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testBranch: @@ -203,22 +315,66 @@ define void @testBranch(ptr %ptr, <16 x i8> %vc, i32 %val) { ; CHECK-BE-NEXT: cmplwi r7, 0 ; CHECK-BE-NEXT: beq cr0, .LBB5_2 ; CHECK-BE-NEXT: # %bb.1: # %if.then -; CHECK-BE-NEXT: xxsetaccz acc0 +; CHECK-BE-NEXT: dmxxsetaccz wacc0 ; CHECK-BE-NEXT: b .LBB5_3 ; CHECK-BE-NEXT: .LBB5_2: # %if.else -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v2 +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: xvi4ger8pp wacc0, v2, v2 ; CHECK-BE-NEXT: .LBB5_3: # %if.end -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r3) -; CHECK-BE-NEXT: stxv vs0, 0(r3) -; CHECK-BE-NEXT: stxv vs3, 48(r3) -; CHECK-BE-NEXT: stxv vs2, 32(r3) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r3) +; CHECK-BE-NEXT: stxv v4, 32(r3) +; CHECK-BE-NEXT: stxv v3, 16(r3) +; CHECK-BE-NEXT: stxv v2, 0(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: testBranch: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: cmplwi r7, 0 +; CHECK-PWR10-NEXT: beq cr0, .LBB5_2 +; CHECK-PWR10-NEXT: # %bb.1: # %if.then +; CHECK-PWR10-NEXT: xxsetaccz acc0 +; CHECK-PWR10-NEXT: b .LBB5_3 +; CHECK-PWR10-NEXT: .LBB5_2: # %if.else +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: xvi4ger8pp acc0, v2, v2 +; CHECK-PWR10-NEXT: .LBB5_3: # %if.end +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r3) +; CHECK-PWR10-NEXT: stxv vs1, 32(r3) +; CHECK-PWR10-NEXT: stxv vs2, 16(r3) +; CHECK-PWR10-NEXT: stxv vs3, 0(r3) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: testBranch: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: cmplwi r7, 0 +; CHECK-BE-PWR10-NEXT: beq cr0, .LBB5_2 +; CHECK-BE-PWR10-NEXT: # %bb.1: # %if.then +; CHECK-BE-PWR10-NEXT: xxsetaccz acc0 +; CHECK-BE-PWR10-NEXT: b .LBB5_3 +; CHECK-BE-PWR10-NEXT: .LBB5_2: # %if.else +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: xvi4ger8pp acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: .LBB5_3: # %if.end +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: blr entry: %tobool = icmp eq i32 %val, 0 br i1 %tobool, label %if.else, label %if.then @@ -246,33 +402,63 @@ declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>) define void @testcse(ptr %res, <16 x i8> %vc) { ; CHECK-LABEL: testcse: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsetaccz acc0 -; CHECK-NEXT: xvf32gerpp acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r3) -; CHECK-NEXT: stxv vs1, 32(r3) -; CHECK-NEXT: stxv vs2, 16(r3) -; CHECK-NEXT: stxv vs3, 0(r3) -; CHECK-NEXT: stxv vs0, 112(r3) -; CHECK-NEXT: stxv vs1, 96(r3) -; CHECK-NEXT: stxv vs2, 80(r3) -; CHECK-NEXT: stxv vs3, 64(r3) +; CHECK-NEXT: dmxxsetaccz wacc0 +; CHECK-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r3) +; CHECK-NEXT: stxv v5, 32(r3) +; CHECK-NEXT: stxv v2, 16(r3) +; CHECK-NEXT: stxv v3, 0(r3) +; CHECK-NEXT: stxv v4, 112(r3) +; CHECK-NEXT: stxv v5, 96(r3) +; CHECK-NEXT: stxv v2, 80(r3) +; CHECK-NEXT: stxv v3, 64(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testcse: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsetaccz acc0 -; CHECK-BE-NEXT: xvf32gerpp acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r3) -; CHECK-BE-NEXT: stxv vs0, 0(r3) -; CHECK-BE-NEXT: stxv vs3, 48(r3) -; CHECK-BE-NEXT: stxv vs2, 32(r3) -; CHECK-BE-NEXT: stxv vs1, 80(r3) -; CHECK-BE-NEXT: stxv vs0, 64(r3) -; CHECK-BE-NEXT: stxv vs3, 112(r3) -; CHECK-BE-NEXT: stxv vs2, 96(r3) +; CHECK-BE-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r3) +; CHECK-BE-NEXT: stxv v4, 32(r3) +; CHECK-BE-NEXT: stxv v3, 16(r3) +; CHECK-BE-NEXT: stxv v2, 0(r3) +; CHECK-BE-NEXT: stxv v5, 112(r3) +; CHECK-BE-NEXT: stxv v4, 96(r3) +; CHECK-BE-NEXT: stxv v3, 80(r3) +; CHECK-BE-NEXT: stxv v2, 64(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: testcse: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: xxsetaccz acc0 +; CHECK-PWR10-NEXT: xvf32gerpp acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r3) +; CHECK-PWR10-NEXT: stxv vs1, 32(r3) +; CHECK-PWR10-NEXT: stxv vs2, 16(r3) +; CHECK-PWR10-NEXT: stxv vs3, 0(r3) +; CHECK-PWR10-NEXT: stxv vs0, 112(r3) +; CHECK-PWR10-NEXT: stxv vs1, 96(r3) +; CHECK-PWR10-NEXT: stxv vs2, 80(r3) +; CHECK-PWR10-NEXT: stxv vs3, 64(r3) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: testcse: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: xxsetaccz acc0 +; CHECK-BE-PWR10-NEXT: xvf32gerpp acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: stxv vs1, 80(r3) +; CHECK-BE-PWR10-NEXT: stxv vs0, 64(r3) +; CHECK-BE-PWR10-NEXT: stxv vs3, 112(r3) +; CHECK-BE-PWR10-NEXT: stxv vs2, 96(r3) +; CHECK-BE-PWR10-NEXT: blr entry: %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() %1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() @@ -287,39 +473,75 @@ entry: define void @testcse2(ptr %res, <16 x i8> %vc) { ; CHECK-LABEL: testcse2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsetaccz acc0 -; CHECK-NEXT: xxsetaccz acc1 -; CHECK-NEXT: xvf32gerpp acc1, v2, v2 -; CHECK-NEXT: xvf32gerpn acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc1 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs4, 48(r3) -; CHECK-NEXT: stxv vs5, 32(r3) -; CHECK-NEXT: stxv vs6, 16(r3) -; CHECK-NEXT: stxv vs7, 0(r3) -; CHECK-NEXT: stxv vs0, 112(r3) -; CHECK-NEXT: stxv vs1, 96(r3) -; CHECK-NEXT: stxv vs2, 80(r3) -; CHECK-NEXT: stxv vs3, 64(r3) +; CHECK-NEXT: dmxxsetaccz wacc1 +; CHECK-NEXT: dmxxsetaccz wacc0 +; CHECK-NEXT: xvf32gerpp wacc1, v2, v2 +; CHECK-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-NEXT: stxv v4, 48(r3) +; CHECK-NEXT: stxv v5, 32(r3) +; CHECK-NEXT: stxv v2, 16(r3) +; CHECK-NEXT: stxv v3, 0(r3) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 112(r3) +; CHECK-NEXT: stxv v5, 96(r3) +; CHECK-NEXT: stxv v2, 80(r3) +; CHECK-NEXT: stxv v3, 64(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testcse2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsetaccz acc0 -; CHECK-BE-NEXT: xxsetaccz acc1 -; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v2 -; CHECK-BE-NEXT: xvf32gerpn acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc1 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs5, 16(r3) -; CHECK-BE-NEXT: stxv vs4, 0(r3) -; CHECK-BE-NEXT: stxv vs7, 48(r3) -; CHECK-BE-NEXT: stxv vs6, 32(r3) -; CHECK-BE-NEXT: stxv vs1, 80(r3) -; CHECK-BE-NEXT: stxv vs0, 64(r3) -; CHECK-BE-NEXT: stxv vs3, 112(r3) -; CHECK-BE-NEXT: stxv vs2, 96(r3) +; CHECK-BE-NEXT: dmxxsetaccz wacc1 +; CHECK-BE-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-NEXT: xvf32gerpp wacc1, v2, v2 +; CHECK-BE-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-BE-NEXT: stxv v5, 48(r3) +; CHECK-BE-NEXT: stxv v4, 32(r3) +; CHECK-BE-NEXT: stxv v3, 16(r3) +; CHECK-BE-NEXT: stxv v2, 0(r3) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 112(r3) +; CHECK-BE-NEXT: stxv v4, 96(r3) +; CHECK-BE-NEXT: stxv v3, 80(r3) +; CHECK-BE-NEXT: stxv v2, 64(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: testcse2: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: xxsetaccz acc0 +; CHECK-PWR10-NEXT: xxsetaccz acc1 +; CHECK-PWR10-NEXT: xvf32gerpp acc1, v2, v2 +; CHECK-PWR10-NEXT: xvf32gerpn acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc1 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs4, 48(r3) +; CHECK-PWR10-NEXT: stxv vs5, 32(r3) +; CHECK-PWR10-NEXT: stxv vs6, 16(r3) +; CHECK-PWR10-NEXT: stxv vs7, 0(r3) +; CHECK-PWR10-NEXT: stxv vs0, 112(r3) +; CHECK-PWR10-NEXT: stxv vs1, 96(r3) +; CHECK-PWR10-NEXT: stxv vs2, 80(r3) +; CHECK-PWR10-NEXT: stxv vs3, 64(r3) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: testcse2: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: xxsetaccz acc0 +; CHECK-BE-PWR10-NEXT: xxsetaccz acc1 +; CHECK-BE-PWR10-NEXT: xvf32gerpp acc1, v2, v2 +; CHECK-BE-PWR10-NEXT: xvf32gerpn acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc1 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs5, 16(r3) +; CHECK-BE-PWR10-NEXT: stxv vs4, 0(r3) +; CHECK-BE-PWR10-NEXT: stxv vs7, 48(r3) +; CHECK-BE-PWR10-NEXT: stxv vs6, 32(r3) +; CHECK-BE-PWR10-NEXT: stxv vs1, 80(r3) +; CHECK-BE-PWR10-NEXT: stxv vs0, 64(r3) +; CHECK-BE-PWR10-NEXT: stxv vs3, 112(r3) +; CHECK-BE-PWR10-NEXT: stxv vs2, 96(r3) +; CHECK-BE-PWR10-NEXT: blr entry: %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() %1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() @@ -334,39 +556,75 @@ entry: define void @testcse3(ptr %res, <16 x i8> %vc) { ; CHECK-LABEL: testcse3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxsetaccz acc0 -; CHECK-NEXT: xxsetaccz acc1 -; CHECK-NEXT: xvf32gerpp acc1, v2, v2 -; CHECK-NEXT: xvf32gerpn acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc1 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs4, 48(r3) -; CHECK-NEXT: stxv vs5, 32(r3) -; CHECK-NEXT: stxv vs6, 16(r3) -; CHECK-NEXT: stxv vs7, 0(r3) -; CHECK-NEXT: stxv vs0, 112(r3) -; CHECK-NEXT: stxv vs1, 96(r3) -; CHECK-NEXT: stxv vs2, 80(r3) -; CHECK-NEXT: stxv vs3, 64(r3) +; CHECK-NEXT: dmxxsetaccz wacc1 +; CHECK-NEXT: dmxxsetaccz wacc0 +; CHECK-NEXT: xvf32gerpp wacc1, v2, v2 +; CHECK-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-NEXT: stxv v4, 48(r3) +; CHECK-NEXT: stxv v5, 32(r3) +; CHECK-NEXT: stxv v2, 16(r3) +; CHECK-NEXT: stxv v3, 0(r3) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 112(r3) +; CHECK-NEXT: stxv v5, 96(r3) +; CHECK-NEXT: stxv v2, 80(r3) +; CHECK-NEXT: stxv v3, 64(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testcse3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsetaccz acc0 -; CHECK-BE-NEXT: xxsetaccz acc1 -; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v2 -; CHECK-BE-NEXT: xvf32gerpn acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc1 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs5, 16(r3) -; CHECK-BE-NEXT: stxv vs4, 0(r3) -; CHECK-BE-NEXT: stxv vs7, 48(r3) -; CHECK-BE-NEXT: stxv vs6, 32(r3) -; CHECK-BE-NEXT: stxv vs1, 80(r3) -; CHECK-BE-NEXT: stxv vs0, 64(r3) -; CHECK-BE-NEXT: stxv vs3, 112(r3) -; CHECK-BE-NEXT: stxv vs2, 96(r3) +; CHECK-BE-NEXT: dmxxsetaccz wacc1 +; CHECK-BE-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-NEXT: xvf32gerpp wacc1, v2, v2 +; CHECK-BE-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-BE-NEXT: stxv v5, 48(r3) +; CHECK-BE-NEXT: stxv v4, 32(r3) +; CHECK-BE-NEXT: stxv v3, 16(r3) +; CHECK-BE-NEXT: stxv v2, 0(r3) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 112(r3) +; CHECK-BE-NEXT: stxv v4, 96(r3) +; CHECK-BE-NEXT: stxv v3, 80(r3) +; CHECK-BE-NEXT: stxv v2, 64(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: testcse3: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: xxsetaccz acc0 +; CHECK-PWR10-NEXT: xxsetaccz acc1 +; CHECK-PWR10-NEXT: xvf32gerpp acc1, v2, v2 +; CHECK-PWR10-NEXT: xvf32gerpn acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc1 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs4, 48(r3) +; CHECK-PWR10-NEXT: stxv vs5, 32(r3) +; CHECK-PWR10-NEXT: stxv vs6, 16(r3) +; CHECK-PWR10-NEXT: stxv vs7, 0(r3) +; CHECK-PWR10-NEXT: stxv vs0, 112(r3) +; CHECK-PWR10-NEXT: stxv vs1, 96(r3) +; CHECK-PWR10-NEXT: stxv vs2, 80(r3) +; CHECK-PWR10-NEXT: stxv vs3, 64(r3) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: testcse3: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: xxsetaccz acc0 +; CHECK-BE-PWR10-NEXT: xxsetaccz acc1 +; CHECK-BE-PWR10-NEXT: xvf32gerpp acc1, v2, v2 +; CHECK-BE-PWR10-NEXT: xvf32gerpn acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc1 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs5, 16(r3) +; CHECK-BE-PWR10-NEXT: stxv vs4, 0(r3) +; CHECK-BE-PWR10-NEXT: stxv vs7, 48(r3) +; CHECK-BE-PWR10-NEXT: stxv vs6, 32(r3) +; CHECK-BE-PWR10-NEXT: stxv vs1, 80(r3) +; CHECK-BE-PWR10-NEXT: stxv vs0, 64(r3) +; CHECK-BE-PWR10-NEXT: stxv vs3, 112(r3) +; CHECK-BE-PWR10-NEXT: stxv vs2, 96(r3) +; CHECK-BE-PWR10-NEXT: blr entry: %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() %1 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -384,45 +642,45 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) { ; CHECK-NEXT: bltlr cr0 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: clrldi r4, r4, 32 -; CHECK-NEXT: li r6, 0 ; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: li r6, 0 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB9_2: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: rldic r7, r6, 4, 28 -; CHECK-NEXT: xxsetaccz acc2 -; CHECK-NEXT: xxsetaccz acc1 -; CHECK-NEXT: addi r6, r6, 6 ; CHECK-NEXT: add r8, r5, r7 ; CHECK-NEXT: lxvx vs0, r5, r7 -; CHECK-NEXT: rldic r7, r4, 6, 26 -; CHECK-NEXT: addi r4, r4, 3 ; CHECK-NEXT: lxv vs1, 16(r8) -; CHECK-NEXT: xvf32gerpp acc2, vs0, vs1 +; CHECK-NEXT: dmxxsetaccz wacc2 +; CHECK-NEXT: dmxxsetaccz wacc1 +; CHECK-NEXT: dmxxsetaccz wacc0 +; CHECK-NEXT: xvf32gerpp wacc2, vs0, vs1 ; CHECK-NEXT: lxv vs0, 32(r8) ; CHECK-NEXT: lxv vs1, 48(r8) -; CHECK-NEXT: xvf32gerpn acc1, vs0, vs1 -; CHECK-NEXT: lxv vs12, 64(r8) -; CHECK-NEXT: lxv vs13, 80(r8) -; CHECK-NEXT: xxsetaccz acc0 +; CHECK-NEXT: rldic r7, r4, 6, 26 +; CHECK-NEXT: addi r4, r4, 3 +; CHECK-NEXT: addi r6, r6, 6 +; CHECK-NEXT: xvf32gerpn wacc1, vs0, vs1 +; CHECK-NEXT: lxv vs0, 64(r8) +; CHECK-NEXT: lxv vs1, 80(r8) ; CHECK-NEXT: add r8, r3, r7 -; CHECK-NEXT: xxmfacc acc2 -; CHECK-NEXT: xvf32gernp acc0, vs12, vs13 -; CHECK-NEXT: stxvx vs11, r3, r7 -; CHECK-NEXT: stxv vs8, 48(r8) -; CHECK-NEXT: xxmfacc acc1 -; CHECK-NEXT: stxv vs9, 32(r8) -; CHECK-NEXT: stxv vs10, 16(r8) -; CHECK-NEXT: stxv vs4, 112(r8) -; CHECK-NEXT: stxv vs5, 96(r8) -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs6, 80(r8) -; CHECK-NEXT: stxv vs7, 64(r8) -; CHECK-NEXT: stxv vs0, 176(r8) -; CHECK-NEXT: stxv vs1, 160(r8) -; CHECK-NEXT: stxv vs2, 144(r8) -; CHECK-NEXT: stxv vs3, 128(r8) +; CHECK-NEXT: xvf32gernp wacc0, vs0, vs1 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc2, 0 +; CHECK-NEXT: stxvx v3, r3, r7 +; CHECK-NEXT: stxv v4, 48(r8) +; CHECK-NEXT: stxv v5, 32(r8) +; CHECK-NEXT: stxv v2, 16(r8) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-NEXT: stxv v4, 112(r8) +; CHECK-NEXT: stxv v5, 96(r8) +; CHECK-NEXT: stxv v2, 80(r8) +; CHECK-NEXT: stxv v3, 64(r8) +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 176(r8) +; CHECK-NEXT: stxv v5, 160(r8) +; CHECK-NEXT: stxv v2, 144(r8) +; CHECK-NEXT: stxv v3, 128(r8) ; CHECK-NEXT: bdnz .LBB9_2 ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-NEXT: blr @@ -433,48 +691,146 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) { ; CHECK-BE-NEXT: bltlr cr0 ; CHECK-BE-NEXT: # %bb.1: # %for.body.preheader ; CHECK-BE-NEXT: clrldi r4, r4, 32 -; CHECK-BE-NEXT: li r6, 0 ; CHECK-BE-NEXT: mtctr r4 ; CHECK-BE-NEXT: li r4, 0 +; CHECK-BE-NEXT: li r6, 0 ; CHECK-BE-NEXT: .p2align 4 ; CHECK-BE-NEXT: .LBB9_2: # %for.body ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: rldic r7, r6, 4, 28 -; CHECK-BE-NEXT: xxsetaccz acc2 -; CHECK-BE-NEXT: xxsetaccz acc1 -; CHECK-BE-NEXT: addi r6, r6, 6 ; CHECK-BE-NEXT: add r8, r5, r7 ; CHECK-BE-NEXT: lxvx vs0, r5, r7 -; CHECK-BE-NEXT: rldic r7, r4, 6, 26 -; CHECK-BE-NEXT: addi r4, r4, 3 ; CHECK-BE-NEXT: lxv vs1, 16(r8) -; CHECK-BE-NEXT: xvf32gerpp acc2, vs0, vs1 +; CHECK-BE-NEXT: dmxxsetaccz wacc2 +; CHECK-BE-NEXT: dmxxsetaccz wacc1 +; CHECK-BE-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-NEXT: xvf32gerpp wacc2, vs0, vs1 ; CHECK-BE-NEXT: lxv vs0, 32(r8) ; CHECK-BE-NEXT: lxv vs1, 48(r8) -; CHECK-BE-NEXT: xvf32gerpn acc1, vs0, vs1 -; CHECK-BE-NEXT: lxv vs12, 64(r8) -; CHECK-BE-NEXT: lxv vs13, 80(r8) -; CHECK-BE-NEXT: xxsetaccz acc0 +; CHECK-BE-NEXT: rldic r7, r4, 6, 26 +; CHECK-BE-NEXT: addi r4, r4, 3 +; CHECK-BE-NEXT: addi r6, r6, 6 +; CHECK-BE-NEXT: xvf32gerpn wacc1, vs0, vs1 +; CHECK-BE-NEXT: lxv vs0, 64(r8) +; CHECK-BE-NEXT: lxv vs1, 80(r8) ; CHECK-BE-NEXT: add r8, r3, r7 -; CHECK-BE-NEXT: xxmfacc acc2 -; CHECK-BE-NEXT: xvf32gernp acc0, vs12, vs13 -; CHECK-BE-NEXT: stxvx vs8, r3, r7 -; CHECK-BE-NEXT: stxv vs9, 16(r8) -; CHECK-BE-NEXT: xxmfacc acc1 -; CHECK-BE-NEXT: stxv vs11, 48(r8) -; CHECK-BE-NEXT: stxv vs10, 32(r8) -; CHECK-BE-NEXT: stxv vs5, 80(r8) -; CHECK-BE-NEXT: stxv vs4, 64(r8) -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs7, 112(r8) -; CHECK-BE-NEXT: stxv vs6, 96(r8) -; CHECK-BE-NEXT: stxv vs1, 144(r8) -; CHECK-BE-NEXT: stxv vs0, 128(r8) -; CHECK-BE-NEXT: stxv vs3, 176(r8) -; CHECK-BE-NEXT: stxv vs2, 160(r8) +; CHECK-BE-NEXT: xvf32gernp wacc0, vs0, vs1 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc2, 0 +; CHECK-BE-NEXT: stxvx v2, r3, r7 +; CHECK-BE-NEXT: stxv v5, 48(r8) +; CHECK-BE-NEXT: stxv v4, 32(r8) +; CHECK-BE-NEXT: stxv v3, 16(r8) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-BE-NEXT: stxv v5, 112(r8) +; CHECK-BE-NEXT: stxv v4, 96(r8) +; CHECK-BE-NEXT: stxv v3, 80(r8) +; CHECK-BE-NEXT: stxv v2, 64(r8) +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 176(r8) +; CHECK-BE-NEXT: stxv v4, 160(r8) +; CHECK-BE-NEXT: stxv v3, 144(r8) +; CHECK-BE-NEXT: stxv v2, 128(r8) ; CHECK-BE-NEXT: bdnz .LBB9_2 ; CHECK-BE-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: testcse4: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: cmpwi r4, 1 +; CHECK-PWR10-NEXT: bltlr cr0 +; CHECK-PWR10-NEXT: # %bb.1: # %for.body.preheader +; CHECK-PWR10-NEXT: clrldi r4, r4, 32 +; CHECK-PWR10-NEXT: li r6, 0 +; CHECK-PWR10-NEXT: mtctr r4 +; CHECK-PWR10-NEXT: li r4, 0 +; CHECK-PWR10-NEXT: .p2align 4 +; CHECK-PWR10-NEXT: .LBB9_2: # %for.body +; CHECK-PWR10-NEXT: # +; CHECK-PWR10-NEXT: rldic r7, r6, 4, 28 +; CHECK-PWR10-NEXT: xxsetaccz acc2 +; CHECK-PWR10-NEXT: xxsetaccz acc1 +; CHECK-PWR10-NEXT: addi r6, r6, 6 +; CHECK-PWR10-NEXT: add r8, r5, r7 +; CHECK-PWR10-NEXT: lxvx vs0, r5, r7 +; CHECK-PWR10-NEXT: rldic r7, r4, 6, 26 +; CHECK-PWR10-NEXT: addi r4, r4, 3 +; CHECK-PWR10-NEXT: lxv vs1, 16(r8) +; CHECK-PWR10-NEXT: xvf32gerpp acc2, vs0, vs1 +; CHECK-PWR10-NEXT: lxv vs0, 32(r8) +; CHECK-PWR10-NEXT: lxv vs1, 48(r8) +; CHECK-PWR10-NEXT: xvf32gerpn acc1, vs0, vs1 +; CHECK-PWR10-NEXT: lxv vs12, 64(r8) +; CHECK-PWR10-NEXT: lxv vs13, 80(r8) +; CHECK-PWR10-NEXT: xxsetaccz acc0 +; CHECK-PWR10-NEXT: add r8, r3, r7 +; CHECK-PWR10-NEXT: xxmfacc acc2 +; CHECK-PWR10-NEXT: xvf32gernp acc0, vs12, vs13 +; CHECK-PWR10-NEXT: stxvx vs11, r3, r7 +; CHECK-PWR10-NEXT: stxv vs8, 48(r8) +; CHECK-PWR10-NEXT: xxmfacc acc1 +; CHECK-PWR10-NEXT: stxv vs9, 32(r8) +; CHECK-PWR10-NEXT: stxv vs10, 16(r8) +; CHECK-PWR10-NEXT: stxv vs4, 112(r8) +; CHECK-PWR10-NEXT: stxv vs5, 96(r8) +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs6, 80(r8) +; CHECK-PWR10-NEXT: stxv vs7, 64(r8) +; CHECK-PWR10-NEXT: stxv vs0, 176(r8) +; CHECK-PWR10-NEXT: stxv vs1, 160(r8) +; CHECK-PWR10-NEXT: stxv vs2, 144(r8) +; CHECK-PWR10-NEXT: stxv vs3, 128(r8) +; CHECK-PWR10-NEXT: bdnz .LBB9_2 +; CHECK-PWR10-NEXT: # %bb.3: # %for.cond.cleanup +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: testcse4: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: cmpwi r4, 1 +; CHECK-BE-PWR10-NEXT: bltlr cr0 +; CHECK-BE-PWR10-NEXT: # %bb.1: # %for.body.preheader +; CHECK-BE-PWR10-NEXT: clrldi r4, r4, 32 +; CHECK-BE-PWR10-NEXT: li r6, 0 +; CHECK-BE-PWR10-NEXT: mtctr r4 +; CHECK-BE-PWR10-NEXT: li r4, 0 +; CHECK-BE-PWR10-NEXT: .p2align 4 +; CHECK-BE-PWR10-NEXT: .LBB9_2: # %for.body +; CHECK-BE-PWR10-NEXT: # +; CHECK-BE-PWR10-NEXT: rldic r7, r6, 4, 28 +; CHECK-BE-PWR10-NEXT: xxsetaccz acc2 +; CHECK-BE-PWR10-NEXT: xxsetaccz acc1 +; CHECK-BE-PWR10-NEXT: addi r6, r6, 6 +; CHECK-BE-PWR10-NEXT: add r8, r5, r7 +; CHECK-BE-PWR10-NEXT: lxvx vs0, r5, r7 +; CHECK-BE-PWR10-NEXT: rldic r7, r4, 6, 26 +; CHECK-BE-PWR10-NEXT: addi r4, r4, 3 +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r8) +; CHECK-BE-PWR10-NEXT: xvf32gerpp acc2, vs0, vs1 +; CHECK-BE-PWR10-NEXT: lxv vs0, 32(r8) +; CHECK-BE-PWR10-NEXT: lxv vs1, 48(r8) +; CHECK-BE-PWR10-NEXT: xvf32gerpn acc1, vs0, vs1 +; CHECK-BE-PWR10-NEXT: lxv vs12, 64(r8) +; CHECK-BE-PWR10-NEXT: lxv vs13, 80(r8) +; CHECK-BE-PWR10-NEXT: xxsetaccz acc0 +; CHECK-BE-PWR10-NEXT: add r8, r3, r7 +; CHECK-BE-PWR10-NEXT: xxmfacc acc2 +; CHECK-BE-PWR10-NEXT: xvf32gernp acc0, vs12, vs13 +; CHECK-BE-PWR10-NEXT: stxvx vs8, r3, r7 +; CHECK-BE-PWR10-NEXT: stxv vs9, 16(r8) +; CHECK-BE-PWR10-NEXT: xxmfacc acc1 +; CHECK-BE-PWR10-NEXT: stxv vs11, 48(r8) +; CHECK-BE-PWR10-NEXT: stxv vs10, 32(r8) +; CHECK-BE-PWR10-NEXT: stxv vs5, 80(r8) +; CHECK-BE-PWR10-NEXT: stxv vs4, 64(r8) +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs7, 112(r8) +; CHECK-BE-PWR10-NEXT: stxv vs6, 96(r8) +; CHECK-BE-PWR10-NEXT: stxv vs1, 144(r8) +; CHECK-BE-PWR10-NEXT: stxv vs0, 128(r8) +; CHECK-BE-PWR10-NEXT: stxv vs3, 176(r8) +; CHECK-BE-PWR10-NEXT: stxv vs2, 160(r8) +; CHECK-BE-PWR10-NEXT: bdnz .LBB9_2 +; CHECK-BE-PWR10-NEXT: # %bb.3: # %for.cond.cleanup +; CHECK-BE-PWR10-NEXT: blr entry: %cmp55 = icmp sgt i32 %lim, 0 br i1 %cmp55, label %for.body.preheader, label %for.cond.cleanup @@ -540,27 +896,26 @@ define void @testRedundantPrimeUnprime(ptr %dst, <16 x i8> %vc) nounwind { ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -112(r1) -; CHECK-NEXT: xxsetaccz acc0 -; CHECK-NEXT: xxsetaccz acc1 +; CHECK-NEXT: dmxxsetaccz wacc0 +; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0 +; CHECK-NEXT: stxv v0, 48(r3) +; CHECK-NEXT: stxv v1, 32(r3) +; CHECK-NEXT: stxv v4, 16(r3) +; CHECK-NEXT: stxv v5, 0(r3) +; CHECK-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 ; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r3) -; CHECK-NEXT: stxv vs1, 32(r3) -; CHECK-NEXT: stxv vs2, 16(r3) -; CHECK-NEXT: stxv vs3, 0(r3) -; CHECK-NEXT: xvf32gerpp acc1, v2, v2 -; CHECK-NEXT: xxmfacc acc1 -; CHECK-NEXT: stxv vs4, 80(r1) -; CHECK-NEXT: stxv vs5, 64(r1) -; CHECK-NEXT: stxv vs6, 48(r1) -; CHECK-NEXT: stxv vs7, 32(r1) +; CHECK-NEXT: stxvp vsp36, 64(r1) +; CHECK-NEXT: stxvp vsp34, 32(r1) ; CHECK-NEXT: bl testRedundantPrimeUnprimeF@notoc -; CHECK-NEXT: lxvp vsp0, 64(r1) -; CHECK-NEXT: lxvp vsp2, 32(r1) -; CHECK-NEXT: stxv vs0, 112(r30) -; CHECK-NEXT: stxv vs1, 96(r30) -; CHECK-NEXT: stxv vs2, 80(r30) -; CHECK-NEXT: stxv vs3, 64(r30) +; CHECK-NEXT: lxvp vsp34, 64(r1) +; CHECK-NEXT: lxvp vsp36, 32(r1) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 112(r30) +; CHECK-NEXT: stxv v5, 96(r30) +; CHECK-NEXT: stxv v2, 80(r30) +; CHECK-NEXT: stxv v3, 64(r30) ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -572,34 +927,100 @@ define void @testRedundantPrimeUnprime(ptr %dst, <16 x i8> %vc) nounwind { ; CHECK-BE-NEXT: mflr r0 ; CHECK-BE-NEXT: std r0, 16(r1) ; CHECK-BE-NEXT: stdu r1, -192(r1) -; CHECK-BE-NEXT: xxsetaccz acc0 -; CHECK-BE-NEXT: xxsetaccz acc1 +; CHECK-BE-NEXT: dmxxsetaccz wacc0 ; CHECK-BE-NEXT: std r30, 176(r1) # 8-byte Folded Spill +; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0 +; CHECK-BE-NEXT: stxv v1, 48(r3) +; CHECK-BE-NEXT: stxv v0, 32(r3) +; CHECK-BE-NEXT: stxv v5, 16(r3) +; CHECK-BE-NEXT: stxv v4, 0(r3) +; CHECK-BE-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 ; CHECK-BE-NEXT: mr r30, r3 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r3) -; CHECK-BE-NEXT: stxv vs0, 0(r3) -; CHECK-BE-NEXT: stxv vs3, 48(r3) -; CHECK-BE-NEXT: stxv vs2, 32(r3) -; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc1 -; CHECK-BE-NEXT: stxv vs4, 112(r1) -; CHECK-BE-NEXT: stxv vs5, 128(r1) -; CHECK-BE-NEXT: stxv vs6, 144(r1) -; CHECK-BE-NEXT: stxv vs7, 160(r1) +; CHECK-BE-NEXT: stxvp vsp36, 112(r1) +; CHECK-BE-NEXT: stxvp vsp34, 144(r1) ; CHECK-BE-NEXT: bl testRedundantPrimeUnprimeF ; CHECK-BE-NEXT: nop -; CHECK-BE-NEXT: lxvp vsp0, 112(r1) -; CHECK-BE-NEXT: lxvp vsp2, 144(r1) -; CHECK-BE-NEXT: stxv vs3, 112(r30) -; CHECK-BE-NEXT: stxv vs2, 96(r30) -; CHECK-BE-NEXT: stxv vs1, 80(r30) -; CHECK-BE-NEXT: stxv vs0, 64(r30) +; CHECK-BE-NEXT: lxvp vsp34, 112(r1) +; CHECK-BE-NEXT: lxvp vsp36, 144(r1) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 112(r30) +; CHECK-BE-NEXT: stxv v4, 96(r30) +; CHECK-BE-NEXT: stxv v3, 80(r30) +; CHECK-BE-NEXT: stxv v2, 64(r30) ; CHECK-BE-NEXT: ld r30, 176(r1) # 8-byte Folded Reload ; CHECK-BE-NEXT: addi r1, r1, 192 ; CHECK-BE-NEXT: ld r0, 16(r1) ; CHECK-BE-NEXT: mtlr r0 ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: testRedundantPrimeUnprime: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: mflr r0 +; CHECK-PWR10-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-PWR10-NEXT: std r0, 16(r1) +; CHECK-PWR10-NEXT: stdu r1, -112(r1) +; CHECK-PWR10-NEXT: xxsetaccz acc0 +; CHECK-PWR10-NEXT: xxsetaccz acc1 +; CHECK-PWR10-NEXT: mr r30, r3 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r3) +; CHECK-PWR10-NEXT: stxv vs1, 32(r3) +; CHECK-PWR10-NEXT: stxv vs2, 16(r3) +; CHECK-PWR10-NEXT: stxv vs3, 0(r3) +; CHECK-PWR10-NEXT: xvf32gerpp acc1, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc1 +; CHECK-PWR10-NEXT: stxv vs4, 80(r1) +; CHECK-PWR10-NEXT: stxv vs5, 64(r1) +; CHECK-PWR10-NEXT: stxv vs6, 48(r1) +; CHECK-PWR10-NEXT: stxv vs7, 32(r1) +; CHECK-PWR10-NEXT: bl testRedundantPrimeUnprimeF@notoc +; CHECK-PWR10-NEXT: lxvp vsp0, 64(r1) +; CHECK-PWR10-NEXT: lxvp vsp2, 32(r1) +; CHECK-PWR10-NEXT: stxv vs0, 112(r30) +; CHECK-PWR10-NEXT: stxv vs1, 96(r30) +; CHECK-PWR10-NEXT: stxv vs2, 80(r30) +; CHECK-PWR10-NEXT: stxv vs3, 64(r30) +; CHECK-PWR10-NEXT: addi r1, r1, 112 +; CHECK-PWR10-NEXT: ld r0, 16(r1) +; CHECK-PWR10-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-PWR10-NEXT: mtlr r0 +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: testRedundantPrimeUnprime: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: mflr r0 +; CHECK-BE-PWR10-NEXT: std r0, 16(r1) +; CHECK-BE-PWR10-NEXT: stdu r1, -192(r1) +; CHECK-BE-PWR10-NEXT: xxsetaccz acc0 +; CHECK-BE-PWR10-NEXT: xxsetaccz acc1 +; CHECK-BE-PWR10-NEXT: std r30, 176(r1) # 8-byte Folded Spill +; CHECK-BE-PWR10-NEXT: mr r30, r3 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: xvf32gerpp acc1, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc1 +; CHECK-BE-PWR10-NEXT: stxv vs4, 112(r1) +; CHECK-BE-PWR10-NEXT: stxv vs5, 128(r1) +; CHECK-BE-PWR10-NEXT: stxv vs6, 144(r1) +; CHECK-BE-PWR10-NEXT: stxv vs7, 160(r1) +; CHECK-BE-PWR10-NEXT: bl testRedundantPrimeUnprimeF +; CHECK-BE-PWR10-NEXT: nop +; CHECK-BE-PWR10-NEXT: lxvp vsp0, 112(r1) +; CHECK-BE-PWR10-NEXT: lxvp vsp2, 144(r1) +; CHECK-BE-PWR10-NEXT: stxv vs3, 112(r30) +; CHECK-BE-PWR10-NEXT: stxv vs2, 96(r30) +; CHECK-BE-PWR10-NEXT: stxv vs1, 80(r30) +; CHECK-BE-PWR10-NEXT: stxv vs0, 64(r30) +; CHECK-BE-PWR10-NEXT: ld r30, 176(r1) # 8-byte Folded Reload +; CHECK-BE-PWR10-NEXT: addi r1, r1, 192 +; CHECK-BE-PWR10-NEXT: ld r0, 16(r1) +; CHECK-BE-PWR10-NEXT: mtlr r0 +; CHECK-BE-PWR10-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() store <512 x i1> %0, ptr %dst, align 64 @@ -617,35 +1038,67 @@ declare void @llvm.ppc.vsx.stxvp(<256 x i1>, ptr) define void @test_ldst_1(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, ptr nocapture %resp) { ; CHECK-LABEL: test_ldst_1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-NEXT: plxvp vsp36, 8(r4), 0 -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_ldst_1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-BE-NEXT: plxvp vsp36, 8(r4), 0 -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test_ldst_1: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: plxvp vsp36, 8(r4), 0 +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test_ldst_1: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: plxvp vsp36, 8(r4), 0 +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = getelementptr i8, ptr %vpp, i64 8 @@ -659,35 +1112,67 @@ entry: define void @test_ldst_2(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, ptr nocapture %resp) { ; CHECK-LABEL: test_ldst_2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-NEXT: lxvp vsp36, 0(r4) -; CHECK-NEXT: xvf64gernp acc0, vsp36, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_ldst_2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-BE-NEXT: lxvp vsp36, 0(r4) -; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test_ldst_2: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: lxvp vsp36, 0(r4) +; CHECK-PWR10-NEXT: xvf64gernp acc0, vsp36, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test_ldst_2: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: lxvp vsp36, 0(r4) +; CHECK-BE-PWR10-NEXT: xvf64gernp acc0, vsp36, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %vpp) @@ -700,35 +1185,67 @@ entry: define void @test_ldst_3(ptr nocapture readonly %vqp, i64 %offs, ptr %vpp, <16 x i8> %vc, ptr nocapture %resp) { ; CHECK-LABEL: test_ldst_3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-NEXT: lxvp vsp36, 0(r5) -; CHECK-NEXT: xvf64gernp acc0, vsp36, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r9) -; CHECK-NEXT: stxv vs1, 32(r9) -; CHECK-NEXT: stxv vs2, 16(r9) -; CHECK-NEXT: stxv vs3, 0(r9) +; CHECK-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r9) +; CHECK-NEXT: stxv v5, 32(r9) +; CHECK-NEXT: stxv v2, 16(r9) +; CHECK-NEXT: stxv v3, 0(r9) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_ldst_3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-BE-NEXT: lxvp vsp36, 0(r5) -; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r9) -; CHECK-BE-NEXT: stxv vs0, 0(r9) -; CHECK-BE-NEXT: stxv vs3, 48(r9) -; CHECK-BE-NEXT: stxv vs2, 32(r9) +; CHECK-BE-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r9) +; CHECK-BE-NEXT: stxv v4, 32(r9) +; CHECK-BE-NEXT: stxv v3, 16(r9) +; CHECK-BE-NEXT: stxv v2, 0(r9) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test_ldst_3: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: lxvp vsp36, 0(r5) +; CHECK-PWR10-NEXT: xvf64gernp acc0, vsp36, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r9) +; CHECK-PWR10-NEXT: stxv vs1, 32(r9) +; CHECK-PWR10-NEXT: stxv vs2, 16(r9) +; CHECK-PWR10-NEXT: stxv vs3, 0(r9) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test_ldst_3: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: lxvp vsp36, 0(r5) +; CHECK-BE-PWR10-NEXT: xvf64gernp acc0, vsp36, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r9) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r9) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r9) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r9) +; CHECK-BE-PWR10-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %vpp) diff --git a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll index ac6ad41633492..c17617d2ac248 100644 --- a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll +++ b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll @@ -1,10 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -mcpu=pwr10 -enable-subreg-liveness -ppc-asm-full-reg-names \ +; RUN: -mcpu=future -enable-subreg-liveness -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ -; RUN: -mcpu=pwr10 -enable-subreg-liveness -ppc-asm-full-reg-names \ +; RUN: -mcpu=future -enable-subreg-liveness -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -enable-subreg-liveness -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-PWR10 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -enable-subreg-liveness -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-PWR10 declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>) @@ -14,23 +20,19 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i ; CHECK-NEXT: vmr v1, v4 ; CHECK-NEXT: vmr v4, v3 ; CHECK-NEXT: vmr v0, v2 -; CHECK-NEXT: xxlor vs3, v5, v5 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: xvi4ger8pp wacc0, v2, v4 ; CHECK-NEXT: ld r3, 96(r1) -; CHECK-NEXT: xxlor vs2, v4, v4 -; CHECK-NEXT: xxlor vs0, v0, v0 -; CHECK-NEXT: xxlor vs1, v1, v1 -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xvi4ger8pp acc0, v2, v3 -; CHECK-NEXT: xvf16ger2pp acc0, v2, v1 -; CHECK-NEXT: pmxvf32gerpn acc0, v3, v5, 0, 0 +; CHECK-NEXT: xvf16ger2pp wacc0, v0, v1 ; CHECK-NEXT: vmr v3, v2 ; CHECK-NEXT: vmr v2, v5 -; CHECK-NEXT: pmxvf64gernp acc0, vsp34, v0, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r3) -; CHECK-NEXT: stxv vs1, 32(r3) -; CHECK-NEXT: stxv vs2, 16(r3) -; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: pmxvf32gerpn wacc0, v4, v5, 0, 0 +; CHECK-NEXT: pmxvf64gernp wacc0, vsp34, v0, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r3) +; CHECK-NEXT: stxv v5, 32(r3) +; CHECK-NEXT: stxv v2, 16(r3) +; CHECK-NEXT: stxv v3, 0(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: intrinsics1: @@ -38,24 +40,106 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i ; CHECK-BE-NEXT: vmr v1, v4 ; CHECK-BE-NEXT: vmr v4, v3 ; CHECK-BE-NEXT: vmr v0, v2 -; CHECK-BE-NEXT: xxlor vs3, v5, v5 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: xvi4ger8pp wacc0, v2, v4 ; CHECK-BE-NEXT: ld r3, 112(r1) -; CHECK-BE-NEXT: xxlor vs2, v4, v4 -; CHECK-BE-NEXT: xxlor vs0, v0, v0 -; CHECK-BE-NEXT: xxlor vs1, v1, v1 -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v3 -; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v1 -; CHECK-BE-NEXT: pmxvf32gerpn acc0, v3, v5, 0, 0 +; CHECK-BE-NEXT: xvf16ger2pp wacc0, v0, v1 ; CHECK-BE-NEXT: vmr v3, v2 ; CHECK-BE-NEXT: vmr v2, v5 -; CHECK-BE-NEXT: pmxvf64gernp acc0, vsp34, v0, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r3) -; CHECK-BE-NEXT: stxv vs0, 0(r3) -; CHECK-BE-NEXT: stxv vs3, 48(r3) -; CHECK-BE-NEXT: stxv vs2, 32(r3) +; CHECK-BE-NEXT: pmxvf32gerpn wacc0, v4, v5, 0, 0 +; CHECK-BE-NEXT: pmxvf64gernp wacc0, vsp34, v0, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r3) +; CHECK-BE-NEXT: stxv v4, 32(r3) +; CHECK-BE-NEXT: stxv v3, 16(r3) +; CHECK-BE-NEXT: stxv v2, 0(r3) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: intrinsics1: +; CHECK-PWR10: # %bb.0: +; CHECK-PWR10-NEXT: vmr v1, v4 +; CHECK-PWR10-NEXT: vmr v4, v3 +; CHECK-PWR10-NEXT: vmr v0, v2 +; CHECK-PWR10-NEXT: xxlor vs3, v5, v5 +; CHECK-PWR10-NEXT: ld r3, 96(r1) +; CHECK-PWR10-NEXT: xxlor vs2, v4, v4 +; CHECK-PWR10-NEXT: xxlor vs0, v0, v0 +; CHECK-PWR10-NEXT: xxlor vs1, v1, v1 +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: xvi4ger8pp acc0, v2, v3 +; CHECK-PWR10-NEXT: xvf16ger2pp acc0, v2, v1 +; CHECK-PWR10-NEXT: pmxvf32gerpn acc0, v3, v5, 0, 0 +; CHECK-PWR10-NEXT: vmr v3, v2 +; CHECK-PWR10-NEXT: vmr v2, v5 +; CHECK-PWR10-NEXT: pmxvf64gernp acc0, vsp34, v0, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r3) +; CHECK-PWR10-NEXT: stxv vs1, 32(r3) +; CHECK-PWR10-NEXT: stxv vs2, 16(r3) +; CHECK-PWR10-NEXT: stxv vs3, 0(r3) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: intrinsics1: +; CHECK-BE-PWR10: # %bb.0: +; CHECK-BE-PWR10-NEXT: vmr v1, v4 +; CHECK-BE-PWR10-NEXT: vmr v4, v3 +; CHECK-BE-PWR10-NEXT: vmr v0, v2 +; CHECK-BE-PWR10-NEXT: xxlor vs3, v5, v5 +; CHECK-BE-PWR10-NEXT: ld r3, 112(r1) +; CHECK-BE-PWR10-NEXT: xxlor vs2, v4, v4 +; CHECK-BE-PWR10-NEXT: xxlor vs0, v0, v0 +; CHECK-BE-PWR10-NEXT: xxlor vs1, v1, v1 +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: xvi4ger8pp acc0, v2, v3 +; CHECK-BE-PWR10-NEXT: xvf16ger2pp acc0, v2, v1 +; CHECK-BE-PWR10-NEXT: pmxvf32gerpn acc0, v3, v5, 0, 0 +; CHECK-BE-PWR10-NEXT: vmr v3, v2 +; CHECK-BE-PWR10-NEXT: vmr v2, v5 +; CHECK-BE-PWR10-NEXT: pmxvf64gernp acc0, vsp34, v0, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: intrinsics1: +; CHECK-WACC: # %bb.0: +; CHECK-WACC-NEXT: vmr v1, v4 +; CHECK-WACC-NEXT: vmr v4, v3 +; CHECK-WACC-NEXT: vmr v0, v2 +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvi4ger8pp wacc0, v2, v4 +; CHECK-WACC-NEXT: ld r3, 96(r1) +; CHECK-WACC-NEXT: xvf16ger2pp wacc0, v0, v1 +; CHECK-WACC-NEXT: vmr v3, v2 +; CHECK-WACC-NEXT: vmr v2, v5 +; CHECK-WACC-NEXT: pmxvf32gerpn wacc0, v4, v5, 0, 0 +; CHECK-WACC-NEXT: pmxvf64gernp wacc0, vsp34, v0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: intrinsics1: +; CHECK-BE-WACC: # %bb.0: +; CHECK-BE-WACC-NEXT: vmr v1, v4 +; CHECK-BE-WACC-NEXT: vmr v4, v3 +; CHECK-BE-WACC-NEXT: vmr v0, v2 +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvi4ger8pp wacc0, v2, v4 +; CHECK-BE-WACC-NEXT: ld r3, 112(r1) +; CHECK-BE-WACC-NEXT: xvf16ger2pp wacc0, v0, v1 +; CHECK-BE-WACC-NEXT: vmr v3, v2 +; CHECK-BE-WACC-NEXT: vmr v2, v5 +; CHECK-BE-WACC-NEXT: pmxvf32gerpn wacc0, v4, v5, 0, 0 +; CHECK-BE-WACC-NEXT: pmxvf64gernp wacc0, vsp34, v0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: blr %1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc3, <16 x i8> %vc2, <16 x i8> %vc4) %2 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc2) %3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3) @@ -71,50 +155,128 @@ define void @intrinsics2(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4, ptr %ptr) { ; CHECK-LABEL: intrinsics2: ; CHECK: # %bb.0: ; CHECK-NEXT: lxv v2, 0(r3) -; CHECK-NEXT: lxv v3, 0(r4) -; CHECK-NEXT: xxlor vs0, v2, v2 ; CHECK-NEXT: lxv v4, 0(r5) +; CHECK-NEXT: lxv v3, 0(r4) ; CHECK-NEXT: lxv v5, 0(r6) -; CHECK-NEXT: xxlor vs2, v4, v4 -; CHECK-NEXT: xxlor vs3, v5, v5 -; CHECK-NEXT: xxlor vs1, v3, v3 ; CHECK-NEXT: vmr v1, v2 +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-NEXT: xvi8ger4pp wacc0, v2, v3 +; CHECK-NEXT: xvf16ger2pn wacc0, v2, v4 ; CHECK-NEXT: vmr v0, v5 -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xvi8ger4pp acc0, v2, v3 -; CHECK-NEXT: xvf16ger2pn acc0, v2, v4 -; CHECK-NEXT: pmxvf32gernn acc0, v3, v5, 0, 0 -; CHECK-NEXT: pmxvf64gernn acc0, vsp32, v2, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs3, 0(r3) -; CHECK-NEXT: stxv vs2, 0(r4) -; CHECK-NEXT: stxv vs1, 0(r5) -; CHECK-NEXT: stxv vs0, 0(r6) +; CHECK-NEXT: pmxvf32gernn wacc0, v3, v5, 0, 0 +; CHECK-NEXT: pmxvf64gernn wacc0, vsp32, v2, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v5, 0(r3) +; CHECK-NEXT: stxv v4, 0(r4) +; CHECK-NEXT: stxv v3, 0(r5) +; CHECK-NEXT: stxv v2, 0(r6) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: intrinsics2: ; CHECK-BE: # %bb.0: ; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: lxv v3, 0(r4) -; CHECK-BE-NEXT: xxlor vs0, v2, v2 ; CHECK-BE-NEXT: lxv v4, 0(r5) +; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: lxv v5, 0(r6) -; CHECK-BE-NEXT: xxlor vs2, v4, v4 -; CHECK-BE-NEXT: xxlor vs3, v5, v5 -; CHECK-BE-NEXT: xxlor vs1, v3, v3 ; CHECK-BE-NEXT: vmr v1, v2 +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-NEXT: xvi8ger4pp wacc0, v2, v3 +; CHECK-BE-NEXT: xvf16ger2pn wacc0, v2, v4 ; CHECK-BE-NEXT: vmr v0, v5 -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xvi8ger4pp acc0, v2, v3 -; CHECK-BE-NEXT: xvf16ger2pn acc0, v2, v4 -; CHECK-BE-NEXT: pmxvf32gernn acc0, v3, v5, 0, 0 -; CHECK-BE-NEXT: pmxvf64gernn acc0, vsp32, v2, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs0, 0(r3) -; CHECK-BE-NEXT: stxv vs1, 0(r4) -; CHECK-BE-NEXT: stxv vs2, 0(r5) -; CHECK-BE-NEXT: stxv vs3, 0(r6) +; CHECK-BE-NEXT: pmxvf32gernn wacc0, v3, v5, 0, 0 +; CHECK-BE-NEXT: pmxvf64gernn wacc0, vsp32, v2, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v2, 0(r3) +; CHECK-BE-NEXT: stxv v3, 0(r4) +; CHECK-BE-NEXT: stxv v4, 0(r5) +; CHECK-BE-NEXT: stxv v5, 0(r6) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: intrinsics2: +; CHECK-PWR10: # %bb.0: +; CHECK-PWR10-NEXT: lxv v2, 0(r3) +; CHECK-PWR10-NEXT: lxv v3, 0(r4) +; CHECK-PWR10-NEXT: xxlor vs0, v2, v2 +; CHECK-PWR10-NEXT: lxv v4, 0(r5) +; CHECK-PWR10-NEXT: lxv v5, 0(r6) +; CHECK-PWR10-NEXT: xxlor vs2, v4, v4 +; CHECK-PWR10-NEXT: xxlor vs3, v5, v5 +; CHECK-PWR10-NEXT: xxlor vs1, v3, v3 +; CHECK-PWR10-NEXT: vmr v1, v2 +; CHECK-PWR10-NEXT: vmr v0, v5 +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: xvi8ger4pp acc0, v2, v3 +; CHECK-PWR10-NEXT: xvf16ger2pn acc0, v2, v4 +; CHECK-PWR10-NEXT: pmxvf32gernn acc0, v3, v5, 0, 0 +; CHECK-PWR10-NEXT: pmxvf64gernn acc0, vsp32, v2, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs3, 0(r3) +; CHECK-PWR10-NEXT: stxv vs2, 0(r4) +; CHECK-PWR10-NEXT: stxv vs1, 0(r5) +; CHECK-PWR10-NEXT: stxv vs0, 0(r6) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: intrinsics2: +; CHECK-BE-PWR10: # %bb.0: +; CHECK-BE-PWR10-NEXT: lxv v2, 0(r3) +; CHECK-BE-PWR10-NEXT: lxv v3, 0(r4) +; CHECK-BE-PWR10-NEXT: xxlor vs0, v2, v2 +; CHECK-BE-PWR10-NEXT: lxv v4, 0(r5) +; CHECK-BE-PWR10-NEXT: lxv v5, 0(r6) +; CHECK-BE-PWR10-NEXT: xxlor vs2, v4, v4 +; CHECK-BE-PWR10-NEXT: xxlor vs3, v5, v5 +; CHECK-BE-PWR10-NEXT: xxlor vs1, v3, v3 +; CHECK-BE-PWR10-NEXT: vmr v1, v2 +; CHECK-BE-PWR10-NEXT: vmr v0, v5 +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: xvi8ger4pp acc0, v2, v3 +; CHECK-BE-PWR10-NEXT: xvf16ger2pn acc0, v2, v4 +; CHECK-BE-PWR10-NEXT: pmxvf32gernn acc0, v3, v5, 0, 0 +; CHECK-BE-PWR10-NEXT: pmxvf64gernn acc0, vsp32, v2, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: stxv vs1, 0(r4) +; CHECK-BE-PWR10-NEXT: stxv vs2, 0(r5) +; CHECK-BE-PWR10-NEXT: stxv vs3, 0(r6) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: intrinsics2: +; CHECK-WACC: # %bb.0: +; CHECK-WACC-NEXT: lxv v2, 0(r3) +; CHECK-WACC-NEXT: lxv v4, 0(r5) +; CHECK-WACC-NEXT: lxv v3, 0(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r6) +; CHECK-WACC-NEXT: vmr v1, v2 +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-WACC-NEXT: xvi8ger4pp wacc0, v2, v3 +; CHECK-WACC-NEXT: xvf16ger2pn wacc0, v2, v4 +; CHECK-WACC-NEXT: vmr v0, v5 +; CHECK-WACC-NEXT: pmxvf32gernn wacc0, v3, v5, 0, 0 +; CHECK-WACC-NEXT: pmxvf64gernn wacc0, vsp32, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v5, 0(r3) +; CHECK-WACC-NEXT: stxv v4, 0(r4) +; CHECK-WACC-NEXT: stxv v3, 0(r5) +; CHECK-WACC-NEXT: stxv v2, 0(r6) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: intrinsics2: +; CHECK-BE-WACC: # %bb.0: +; CHECK-BE-WACC-NEXT: lxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 0(r5) +; CHECK-BE-WACC-NEXT: lxv v3, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 0(r6) +; CHECK-BE-WACC-NEXT: vmr v1, v2 +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvi8ger4pp wacc0, v2, v3 +; CHECK-BE-WACC-NEXT: xvf16ger2pn wacc0, v2, v4 +; CHECK-BE-WACC-NEXT: vmr v0, v5 +; CHECK-BE-WACC-NEXT: pmxvf32gernn wacc0, v3, v5, 0, 0 +; CHECK-BE-WACC-NEXT: pmxvf64gernn wacc0, vsp32, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 0(r4) +; CHECK-BE-WACC-NEXT: stxv v4, 0(r5) +; CHECK-BE-WACC-NEXT: stxv v5, 0(r6) +; CHECK-BE-WACC-NEXT: blr %vc1 = load <16 x i8>, ptr %ptr1, align 16 %vc2 = load <16 x i8>, ptr %ptr2, align 16 %vc3 = load <16 x i8>, ptr %ptr3, align 16 @@ -140,23 +302,61 @@ define void @intrinsics2(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4, ptr %ptr) { define void @test1(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvi4ger8 acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: xvi4ger8 wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xvi4ger8 acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: xvi4ger8 wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test1: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: xvi4ger8 acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test1: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: xvi4ger8 acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test1: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: xvi4ger8 wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test1: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: xvi4ger8 wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> %vc, <16 x i8> %vc) store <512 x i1> %0, ptr %resp, align 64 @@ -169,33 +369,91 @@ declare <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8>, <16 x i8>) define void @test2(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xvi4ger8pp acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: xvi4ger8pp wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: xvi4ger8pp wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test2: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: xvi4ger8pp acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test2: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: xvi4ger8pp acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test2: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvi4ger8pp wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test2: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvi4ger8pp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -209,23 +467,61 @@ declare <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>) define void @test3(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pmxvi4ger8 acc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: pmxvi4ger8 wacc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: pmxvi4ger8 acc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: pmxvi4ger8 wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test3: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: pmxvi4ger8 acc0, v2, v2, 0, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test3: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: pmxvi4ger8 acc0, v2, v2, 0, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test3: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: pmxvi4ger8 wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test3: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: pmxvi4ger8 wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) store <512 x i1> %0, ptr %resp, align 64 @@ -238,33 +534,91 @@ declare <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8>, <16 x i8>, i32, i32, i32) define void @test4(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: pmxvi4ger8pp acc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: pmxvi4ger8pp wacc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test4: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: pmxvi4ger8pp acc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: pmxvi4ger8pp wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test4: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: pmxvi4ger8pp acc0, v2, v2, 0, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test4: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: pmxvi4ger8pp acc0, v2, v2, 0, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test4: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvi4ger8pp wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test4: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvi4ger8pp wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) @@ -278,23 +632,61 @@ declare <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>, define void @test5(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvi8ger4 acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: xvi8ger4 wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test5: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xvi8ger4 acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: xvi8ger4 wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test5: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: xvi8ger4 acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test5: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: xvi8ger4 acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test5: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: xvi8ger4 wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test5: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: xvi8ger4 wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8> %vc, <16 x i8> %vc) store <512 x i1> %0, ptr %resp, align 64 @@ -307,33 +699,91 @@ declare <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8>, <16 x i8>) define void @test6(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xvi8ger4pp acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: xvi8ger4pp wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test6: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xvi8ger4pp acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: xvi8ger4pp wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test6: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: xvi8ger4pp acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test6: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: xvi8ger4pp acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test6: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvi8ger4pp wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test6: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvi8ger4pp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -347,23 +797,61 @@ declare <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1>, <16 x i8>, <16 x i8>) define void @test7(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pmxvi8ger4 acc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: pmxvi8ger4 wacc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test7: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: pmxvi8ger4 acc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: pmxvi8ger4 wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test7: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: pmxvi8ger4 acc0, v2, v2, 0, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test7: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: pmxvi8ger4 acc0, v2, v2, 0, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test7: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: pmxvi8ger4 wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test7: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: pmxvi8ger4 wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) store <512 x i1> %0, ptr %resp, align 64 @@ -376,33 +864,91 @@ declare <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8>, <16 x i8>, i32, i32, i32) define void @test8(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: pmxvi8ger4pp acc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: pmxvi8ger4pp wacc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test8: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: pmxvi8ger4pp acc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: pmxvi8ger4pp wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test8: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: pmxvi8ger4pp acc0, v2, v2, 0, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test8: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: pmxvi8ger4pp acc0, v2, v2, 0, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test8: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvi8ger4pp wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test8: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvi8ger4pp wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) @@ -416,23 +962,61 @@ declare <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1>, <16 x i8>, <16 x i8>, define void @test9(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test9: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvi16ger2s acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: xvi16ger2s wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test9: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xvi16ger2s acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: xvi16ger2s wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test9: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: xvi16ger2s acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test9: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: xvi16ger2s acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test9: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: xvi16ger2s wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test9: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: xvi16ger2s wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8> %vc, <16 x i8> %vc) store <512 x i1> %0, ptr %resp, align 64 @@ -445,33 +1029,91 @@ declare <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8>, <16 x i8>) define void @test10(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test10: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xvi16ger2spp acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: xvi16ger2spp wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test10: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xvi16ger2spp acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: xvi16ger2spp wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test10: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: xvi16ger2spp acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test10: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: xvi16ger2spp acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test10: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvi16ger2spp wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test10: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvi16ger2spp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -485,23 +1127,61 @@ declare <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1>, <16 x i8>, <16 x i8>) define void @test11(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test11: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pmxvi16ger2s acc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: pmxvi16ger2s wacc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test11: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: pmxvi16ger2s acc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: pmxvi16ger2s wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test11: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: pmxvi16ger2s acc0, v2, v2, 0, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test11: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: pmxvi16ger2s acc0, v2, v2, 0, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test11: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: pmxvi16ger2s wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test11: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: pmxvi16ger2s wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) store <512 x i1> %0, ptr %resp, align 64 @@ -514,33 +1194,91 @@ declare <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8>, <16 x i8>, i32, i32, i3 define void @test12(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test12: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: pmxvi16ger2spp acc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: pmxvi16ger2spp wacc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test12: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: pmxvi16ger2spp acc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: pmxvi16ger2spp wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test12: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: pmxvi16ger2spp acc0, v2, v2, 0, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test12: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: pmxvi16ger2spp acc0, v2, v2, 0, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test12: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvi16ger2spp wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test12: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvi16ger2spp wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) @@ -554,23 +1292,61 @@ declare <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1>, <16 x i8>, <16 x i8> define void @test13(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test13: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvf16ger2 acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: xvf16ger2 wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test13: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xvf16ger2 acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: xvf16ger2 wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test13: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: xvf16ger2 acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test13: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: xvf16ger2 acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test13: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: xvf16ger2 wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test13: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: xvf16ger2 wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8> %vc, <16 x i8> %vc) store <512 x i1> %0, ptr %resp, align 64 @@ -583,33 +1359,91 @@ declare <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8>, <16 x i8>) define void @test14(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test14: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xvf16ger2pp acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: xvf16ger2pp wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test14: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: xvf16ger2pp wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test14: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: xvf16ger2pp acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test14: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: xvf16ger2pp acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test14: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvf16ger2pp wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test14: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf16ger2pp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -623,33 +1457,91 @@ declare <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>) define void @test15(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test15: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xvf16ger2pn acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: xvf16ger2pn wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test15: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xvf16ger2pn acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: xvf16ger2pn wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test15: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: xvf16ger2pn acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test15: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: xvf16ger2pn acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test15: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvf16ger2pn wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test15: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf16ger2pn wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -663,33 +1555,91 @@ declare <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1>, <16 x i8>, <16 x i8>) define void @test16(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xvf16ger2np acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: xvf16ger2np wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test16: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xvf16ger2np acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: xvf16ger2np wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test16: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: xvf16ger2np acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test16: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: xvf16ger2np acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test16: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvf16ger2np wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test16: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf16ger2np wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -703,33 +1653,91 @@ declare <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1>, <16 x i8>, <16 x i8>) define void @test17(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test17: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xvf16ger2nn acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: xvf16ger2nn wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test17: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xvf16ger2nn acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: xvf16ger2nn wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test17: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: xvf16ger2nn acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test17: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: xvf16ger2nn acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test17: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvf16ger2nn wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test17: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf16ger2nn wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -743,23 +1751,61 @@ declare <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1>, <16 x i8>, <16 x i8>) define void @test18(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test18: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pmxvf16ger2 acc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: pmxvf16ger2 wacc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test18: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: pmxvf16ger2 acc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: pmxvf16ger2 wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test18: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: pmxvf16ger2 acc0, v2, v2, 0, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test18: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: pmxvf16ger2 acc0, v2, v2, 0, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test18: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: pmxvf16ger2 wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test18: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: pmxvf16ger2 wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) store <512 x i1> %0, ptr %resp, align 64 @@ -772,33 +1818,91 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8>, <16 x i8>, i32, i32, i32 define void @test19(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test19: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: pmxvf16ger2pp acc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: pmxvf16ger2pp wacc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test19: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: pmxvf16ger2pp acc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: pmxvf16ger2pp wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test19: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: pmxvf16ger2pp acc0, v2, v2, 0, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test19: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: pmxvf16ger2pp acc0, v2, v2, 0, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test19: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvf16ger2pp wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test19: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvf16ger2pp wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) @@ -812,33 +1916,91 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>, define void @test20(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test20: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: pmxvf16ger2pn acc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: pmxvf16ger2pn wacc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test20: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: pmxvf16ger2pn acc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: pmxvf16ger2pn wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test20: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: pmxvf16ger2pn acc0, v2, v2, 0, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test20: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: pmxvf16ger2pn acc0, v2, v2, 0, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test20: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvf16ger2pn wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test20: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvf16ger2pn wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) @@ -852,33 +2014,91 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1>, <16 x i8>, <16 x i8>, define void @test21(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test21: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: pmxvf16ger2np acc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: pmxvf16ger2np wacc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test21: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: pmxvf16ger2np acc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: pmxvf16ger2np wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test21: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: pmxvf16ger2np acc0, v2, v2, 0, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test21: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: pmxvf16ger2np acc0, v2, v2, 0, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test21: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvf16ger2np wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test21: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvf16ger2np wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) @@ -892,33 +2112,91 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1>, <16 x i8>, <16 x i8>, define void @test22(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test22: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: pmxvf16ger2nn acc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: pmxvf16ger2nn wacc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test22: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: pmxvf16ger2nn acc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: pmxvf16ger2nn wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test22: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: pmxvf16ger2nn acc0, v2, v2, 0, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test22: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: pmxvf16ger2nn acc0, v2, v2, 0, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test22: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvf16ger2nn wacc0, v2, v2, 0, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test22: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvf16ger2nn wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) @@ -932,23 +2210,61 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1>, <16 x i8>, <16 x i8>, define void @test23(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test23: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvf32ger acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: xvf32ger wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test23: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xvf32ger acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: xvf32ger wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test23: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: xvf32ger acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test23: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: xvf32ger acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test23: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: xvf32ger wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test23: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: xvf32ger wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8> %vc, <16 x i8> %vc) store <512 x i1> %0, ptr %resp, align 64 @@ -961,33 +2277,91 @@ declare <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8>, <16 x i8>) define void @test24(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test24: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xvf32gerpp acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test24: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xvf32gerpp acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test24: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: xvf32gerpp acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test24: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: xvf32gerpp acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test24: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test24: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -1001,33 +2375,91 @@ declare <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>) define void @test25(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test25: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xvf32gerpn acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test25: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xvf32gerpn acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test25: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: xvf32gerpn acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test25: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: xvf32gerpn acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test25: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test25: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -1041,33 +2473,91 @@ declare <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1>, <16 x i8>, <16 x i8>) define void @test26(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test26: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xvf32gernp acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: xvf32gernp wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test26: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xvf32gernp acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: xvf32gernp wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test26: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: xvf32gernp acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test26: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: xvf32gernp acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test26: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvf32gernp wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test26: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf32gernp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -1081,33 +2571,91 @@ declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>) define void @test27(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test27: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: xvf32gernn acc0, v2, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: xvf32gernn wacc0, v2, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test27: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: xvf32gernn acc0, v2, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: xvf32gernn wacc0, v2, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test27: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: xvf32gernn acc0, v2, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test27: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: xvf32gernn acc0, v2, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test27: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvf32gernn wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test27: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvf32gernn wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -1121,23 +2669,61 @@ declare <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1>, <16 x i8>, <16 x i8>) define void @test28(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test28: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pmxvf32ger acc0, v2, v2, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: pmxvf32ger wacc0, v2, v2, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test28: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: pmxvf32ger acc0, v2, v2, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: pmxvf32ger wacc0, v2, v2, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test28: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: pmxvf32ger acc0, v2, v2, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test28: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: pmxvf32ger acc0, v2, v2, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test28: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: pmxvf32ger wacc0, v2, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test28: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: pmxvf32ger wacc0, v2, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) store <512 x i1> %0, ptr %resp, align 64 @@ -1150,33 +2736,91 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8>, <16 x i8>, i32, i32) define void @test29(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test29: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: pmxvf32gerpp acc0, v2, v2, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: pmxvf32gerpp wacc0, v2, v2, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test29: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: pmxvf32gerpp acc0, v2, v2, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: pmxvf32gerpp wacc0, v2, v2, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test29: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: pmxvf32gerpp acc0, v2, v2, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test29: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: pmxvf32gerpp acc0, v2, v2, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test29: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvf32gerpp wacc0, v2, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test29: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvf32gerpp wacc0, v2, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) @@ -1190,33 +2834,91 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>, define void @test30(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test30: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: pmxvf32gerpn acc0, v2, v2, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: pmxvf32gerpn wacc0, v2, v2, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test30: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: pmxvf32gerpn acc0, v2, v2, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: pmxvf32gerpn wacc0, v2, v2, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test30: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: pmxvf32gerpn acc0, v2, v2, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test30: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: pmxvf32gerpn acc0, v2, v2, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test30: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvf32gerpn wacc0, v2, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test30: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvf32gerpn wacc0, v2, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) @@ -1230,33 +2932,91 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1>, <16 x i8>, <16 x i8>, define void @test31(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test31: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: pmxvf32gernp acc0, v2, v2, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: pmxvf32gernp wacc0, v2, v2, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test31: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: pmxvf32gernp acc0, v2, v2, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: pmxvf32gernp wacc0, v2, v2, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test31: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: pmxvf32gernp acc0, v2, v2, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test31: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: pmxvf32gernp acc0, v2, v2, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test31: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvf32gernp wacc0, v2, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test31: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvf32gernp wacc0, v2, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) @@ -1270,33 +3030,91 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>, define void @test32(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) -; CHECK-NEXT: xxmtacc acc0 -; CHECK-NEXT: pmxvf32gernn acc0, v2, v2, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: pmxvf32gernn wacc0, v2, v2, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test32: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxmtacc acc0 -; CHECK-BE-NEXT: pmxvf32gernn acc0, v2, v2, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: pmxvf32gernn wacc0, v2, v2, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test32: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: pmxvf32gernn acc0, v2, v2, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test32: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: pmxvf32gernn acc0, v2, v2, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test32: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: pmxvf32gernn wacc0, v2, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test32: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvf32gernn wacc0, v2, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf32gernn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0) @@ -1312,25 +3130,71 @@ define void @test33(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv v4, 16(r4) ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: xvf64ger acc0, vsp36, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: xvf64ger wacc0, vsp36, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test33: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv v4, 0(r4) ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: xvf64ger acc0, vsp36, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: xvf64ger wacc0, vsp36, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test33: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv v4, 16(r4) +; CHECK-PWR10-NEXT: lxv v5, 0(r4) +; CHECK-PWR10-NEXT: xvf64ger acc0, vsp36, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test33: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) +; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) +; CHECK-BE-PWR10-NEXT: xvf64ger acc0, vsp36, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test33: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: xvf64ger wacc0, vsp36, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test33: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: xvf64ger wacc0, vsp36, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <256 x i1>, ptr %vpp, align 32 %1 = tail call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> %0, <16 x i8> %vc) @@ -1344,37 +3208,103 @@ declare <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1>, <16 x i8>) define void @test34(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test34: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-NEXT: lxv v4, 16(r4) -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: xvf64gerpp acc0, vsp36, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: xvf64gerpp wacc0, vsp36, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test34: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-BE-NEXT: lxv v4, 0(r4) -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: xvf64gerpp acc0, vsp36, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: xvf64gerpp wacc0, vsp36, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test34: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: lxv v4, 16(r4) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: lxv v5, 0(r4) +; CHECK-PWR10-NEXT: xvf64gerpp acc0, vsp36, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test34: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) +; CHECK-BE-PWR10-NEXT: xvf64gerpp acc0, vsp36, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test34: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: xvf64gerpp wacc0, vsp36, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test34: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: xvf64gerpp wacc0, vsp36, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = load <256 x i1>, ptr %vpp, align 32 @@ -1389,37 +3319,103 @@ declare <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>) define void @test35(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test35: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-NEXT: lxv v4, 16(r4) -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: xvf64gerpn acc0, vsp36, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: xvf64gerpn wacc0, vsp36, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test35: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-BE-NEXT: lxv v4, 0(r4) -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: xvf64gerpn acc0, vsp36, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: xvf64gerpn wacc0, vsp36, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test35: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: lxv v4, 16(r4) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: lxv v5, 0(r4) +; CHECK-PWR10-NEXT: xvf64gerpn acc0, vsp36, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test35: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) +; CHECK-BE-PWR10-NEXT: xvf64gerpn acc0, vsp36, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test35: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: xvf64gerpn wacc0, vsp36, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test35: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: xvf64gerpn wacc0, vsp36, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = load <256 x i1>, ptr %vpp, align 32 @@ -1434,37 +3430,103 @@ declare <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1>, <256 x i1>, <16 x i8>) define void @test36(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test36: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-NEXT: lxv v4, 16(r4) -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: xvf64gernp acc0, vsp36, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test36: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-BE-NEXT: lxv v4, 0(r4) -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test36: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: lxv v4, 16(r4) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: lxv v5, 0(r4) +; CHECK-PWR10-NEXT: xvf64gernp acc0, vsp36, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test36: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) +; CHECK-BE-PWR10-NEXT: xvf64gernp acc0, vsp36, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test36: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test36: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = load <256 x i1>, ptr %vpp, align 32 @@ -1479,37 +3541,103 @@ declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>) define void @test37(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test37: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-NEXT: lxv v4, 16(r4) -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: xvf64gernn acc0, vsp36, v2 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: xvf64gernn wacc0, vsp36, v2 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test37: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-BE-NEXT: lxv v4, 0(r4) -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: xvf64gernn acc0, vsp36, v2 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: xvf64gernn wacc0, vsp36, v2 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test37: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: lxv v4, 16(r4) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: lxv v5, 0(r4) +; CHECK-PWR10-NEXT: xvf64gernn acc0, vsp36, v2 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test37: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) +; CHECK-BE-PWR10-NEXT: xvf64gernn acc0, vsp36, v2 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test37: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: xvf64gernn wacc0, vsp36, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test37: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: xvf64gernn wacc0, vsp36, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = load <256 x i1>, ptr %vpp, align 32 @@ -1526,25 +3654,71 @@ define void @test38(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv v4, 16(r4) ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: pmxvf64ger acc0, vsp36, v2, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: pmxvf64ger wacc0, vsp36, v2, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test38: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv v4, 0(r4) ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: pmxvf64ger acc0, vsp36, v2, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: pmxvf64ger wacc0, vsp36, v2, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test38: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv v4, 16(r4) +; CHECK-PWR10-NEXT: lxv v5, 0(r4) +; CHECK-PWR10-NEXT: pmxvf64ger acc0, vsp36, v2, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test38: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) +; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) +; CHECK-BE-PWR10-NEXT: pmxvf64ger acc0, vsp36, v2, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test38: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: pmxvf64ger wacc0, vsp36, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test38: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: pmxvf64ger wacc0, vsp36, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <256 x i1>, ptr %vpp, align 32 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1> %0, <16 x i8> %vc, i32 0, i32 0) @@ -1558,37 +3732,103 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1>, <16 x i8>, i32, i32) define void @test39(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test39: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-NEXT: lxv v4, 16(r4) -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: pmxvf64gerpp acc0, vsp36, v2, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: pmxvf64gerpp wacc0, vsp36, v2, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test39: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-BE-NEXT: lxv v4, 0(r4) -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: pmxvf64gerpp acc0, vsp36, v2, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: pmxvf64gerpp wacc0, vsp36, v2, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test39: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: lxv v4, 16(r4) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: lxv v5, 0(r4) +; CHECK-PWR10-NEXT: pmxvf64gerpp acc0, vsp36, v2, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test39: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) +; CHECK-BE-PWR10-NEXT: pmxvf64gerpp acc0, vsp36, v2, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test39: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: pmxvf64gerpp wacc0, vsp36, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test39: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: pmxvf64gerpp wacc0, vsp36, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = load <256 x i1>, ptr %vpp, align 32 @@ -1603,37 +3843,103 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>, define void @test40(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test40: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-NEXT: lxv v4, 16(r4) -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: pmxvf64gerpn acc0, vsp36, v2, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: pmxvf64gerpn wacc0, vsp36, v2, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test40: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-BE-NEXT: lxv v4, 0(r4) -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: pmxvf64gerpn acc0, vsp36, v2, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: pmxvf64gerpn wacc0, vsp36, v2, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test40: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: lxv v4, 16(r4) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: lxv v5, 0(r4) +; CHECK-PWR10-NEXT: pmxvf64gerpn acc0, vsp36, v2, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test40: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) +; CHECK-BE-PWR10-NEXT: pmxvf64gerpn acc0, vsp36, v2, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test40: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: pmxvf64gerpn wacc0, vsp36, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test40: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: pmxvf64gerpn wacc0, vsp36, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = load <256 x i1>, ptr %vpp, align 32 @@ -1648,37 +3954,103 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1>, <256 x i1>, <16 x i8>, define void @test41(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test41: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-NEXT: lxv v4, 16(r4) -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: pmxvf64gernp acc0, vsp36, v2, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: pmxvf64gernp wacc0, vsp36, v2, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test41: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-BE-NEXT: lxv v4, 0(r4) -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: pmxvf64gernp acc0, vsp36, v2, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: pmxvf64gernp wacc0, vsp36, v2, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test41: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: lxv v4, 16(r4) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: lxv v5, 0(r4) +; CHECK-PWR10-NEXT: pmxvf64gernp acc0, vsp36, v2, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test41: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) +; CHECK-BE-PWR10-NEXT: pmxvf64gernp acc0, vsp36, v2, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test41: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: pmxvf64gernp wacc0, vsp36, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test41: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: pmxvf64gernp wacc0, vsp36, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = load <256 x i1>, ptr %vpp, align 32 @@ -1693,37 +4065,103 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>, define void @test42(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test42: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv vs3, 0(r3) -; CHECK-NEXT: lxv vs2, 16(r3) -; CHECK-NEXT: lxv vs1, 32(r3) -; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: lxv v5, 0(r3) +; CHECK-NEXT: lxv v1, 32(r3) +; CHECK-NEXT: lxv v4, 16(r3) +; CHECK-NEXT: lxv v0, 48(r3) +; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-NEXT: lxv v4, 16(r4) -; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 -; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs0, 48(r7) -; CHECK-NEXT: stxv vs1, 32(r7) -; CHECK-NEXT: stxv vs2, 16(r7) -; CHECK-NEXT: stxv vs3, 0(r7) +; CHECK-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 +; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-NEXT: stxv v4, 48(r7) +; CHECK-NEXT: stxv v5, 32(r7) +; CHECK-NEXT: stxv v2, 16(r7) +; CHECK-NEXT: stxv v3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test42: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: lxv v5, 48(r3) +; CHECK-BE-NEXT: lxv v1, 16(r3) +; CHECK-BE-NEXT: lxv v4, 32(r3) +; CHECK-BE-NEXT: lxv v0, 0(r3) +; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 ; CHECK-BE-NEXT: lxv v4, 0(r4) -; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 -; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs1, 16(r7) -; CHECK-BE-NEXT: stxv vs0, 0(r7) -; CHECK-BE-NEXT: stxv vs3, 48(r7) -; CHECK-BE-NEXT: stxv vs2, 32(r7) +; CHECK-BE-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 +; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-NEXT: stxv v5, 48(r7) +; CHECK-BE-NEXT: stxv v4, 32(r7) +; CHECK-BE-NEXT: stxv v3, 16(r7) +; CHECK-BE-NEXT: stxv v2, 0(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-PWR10-LABEL: test42: +; CHECK-PWR10: # %bb.0: # %entry +; CHECK-PWR10-NEXT: lxv vs3, 0(r3) +; CHECK-PWR10-NEXT: lxv vs2, 16(r3) +; CHECK-PWR10-NEXT: lxv vs1, 32(r3) +; CHECK-PWR10-NEXT: lxv vs0, 48(r3) +; CHECK-PWR10-NEXT: lxv v4, 16(r4) +; CHECK-PWR10-NEXT: xxmtacc acc0 +; CHECK-PWR10-NEXT: lxv v5, 0(r4) +; CHECK-PWR10-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 +; CHECK-PWR10-NEXT: xxmfacc acc0 +; CHECK-PWR10-NEXT: stxv vs0, 48(r7) +; CHECK-PWR10-NEXT: stxv vs1, 32(r7) +; CHECK-PWR10-NEXT: stxv vs2, 16(r7) +; CHECK-PWR10-NEXT: stxv vs3, 0(r7) +; CHECK-PWR10-NEXT: blr +; +; CHECK-BE-PWR10-LABEL: test42: +; CHECK-BE-PWR10: # %bb.0: # %entry +; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) +; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) +; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) +; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) +; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) +; CHECK-BE-PWR10-NEXT: xxmtacc acc0 +; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) +; CHECK-BE-PWR10-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 +; CHECK-BE-PWR10-NEXT: xxmfacc acc0 +; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) +; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) +; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) +; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) +; CHECK-BE-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test42: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxv v4, 16(r4) +; CHECK-WACC-NEXT: lxv v5, 0(r4) +; CHECK-WACC-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr +; CHECK-BE-WACC-LABEL: test42: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) +; CHECK-BE-WACC-NEXT: lxv v5, 16(r4) +; CHECK-BE-WACC-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = load <256 x i1>, ptr %vpp, align 32 From cd122bc956270d7700a4d0c3333ae09d6a95df67 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Thu, 30 Oct 2025 16:41:44 -0500 Subject: [PATCH 3/5] fix def of pmxvi16ger2pp for cpu=future and add testing --- llvm/lib/Target/PowerPC/PPCInstrMMA.td | 2 +- .../mma-integer-based-outer-product.ll | 166 ++++++++++++++++++ 2 files changed, 167 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td index 686e0209d6e1f..fc3cde3f464bb 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrMMA.td +++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td @@ -202,7 +202,7 @@ multiclass ACC_UM_M244_XO46 opcode, bits<8> xo, dag IOL, string asmbase, RegConstraint<"@earlyclobber $AT">; def PM#NAME#WPP : MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0x20), (outs acc:$AT), + opcode, !or(xo, 0x20), (outs wacc:$AT), !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), diff --git a/llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll index e932aec2c7134..7b36fa4f64f71 100644 --- a/llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll +++ b/llvm/test/CodeGen/PowerPC/mma-integer-based-outer-product.ll @@ -5,6 +5,12 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-LE-WACC +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC ; Function Attrs: nofree nounwind writeonly define dso_local void @test1(ptr nocapture readnone %vqp, ptr nocapture readnone %vpp, <16 x i8> %vc, ptr nocapture %resp) { @@ -27,6 +33,26 @@ define dso_local void @test1(ptr nocapture readnone %vqp, ptr nocapture readnone ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-LE-WACC-LABEL: test1: +; CHECK-LE-WACC: # %bb.0: # %entry +; CHECK-LE-WACC-NEXT: xvi16ger2 wacc0, v2, v2 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxv v4, 48(r7) +; CHECK-LE-WACC-NEXT: stxv v5, 32(r7) +; CHECK-LE-WACC-NEXT: stxv v2, 16(r7) +; CHECK-LE-WACC-NEXT: stxv v3, 0(r7) +; CHECK-LE-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test1: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: xvi16ger2 wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2(<16 x i8> %vc, <16 x i8> %vc) store <512 x i1> %0, ptr %resp, align 64 @@ -57,6 +83,26 @@ define dso_local void @test2(ptr nocapture readnone %vqp, ptr nocapture readnone ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-LE-WACC-LABEL: test2: +; CHECK-LE-WACC: # %bb.0: # %entry +; CHECK-LE-WACC-NEXT: pmxvi16ger2 wacc0, v2, v2, 0, 0, 0 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxv v4, 48(r7) +; CHECK-LE-WACC-NEXT: stxv v5, 32(r7) +; CHECK-LE-WACC-NEXT: stxv v2, 16(r7) +; CHECK-LE-WACC-NEXT: stxv v3, 0(r7) +; CHECK-LE-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test2: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: pmxvi16ger2 wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2(<16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) store <512 x i1> %0, ptr %resp, align 64 @@ -97,6 +143,36 @@ define dso_local void @test3(ptr nocapture readonly %vqp, ptr nocapture readnone ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-LE-WACC-LABEL: test3: +; CHECK-LE-WACC: # %bb.0: # %entry +; CHECK-LE-WACC-NEXT: lxv v5, 0(r3) +; CHECK-LE-WACC-NEXT: lxv v1, 32(r3) +; CHECK-LE-WACC-NEXT: lxv v4, 16(r3) +; CHECK-LE-WACC-NEXT: lxv v0, 48(r3) +; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-LE-WACC-NEXT: xvi8ger4spp wacc0, v2, v2 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxv v4, 48(r7) +; CHECK-LE-WACC-NEXT: stxv v5, 32(r7) +; CHECK-LE-WACC-NEXT: stxv v2, 16(r7) +; CHECK-LE-WACC-NEXT: stxv v3, 0(r7) +; CHECK-LE-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test3: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvi8ger4spp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvi8ger4spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -138,6 +214,36 @@ define dso_local void @test4(ptr nocapture readonly %vqp, ptr nocapture readnone ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-LE-WACC-LABEL: test4: +; CHECK-LE-WACC: # %bb.0: # %entry +; CHECK-LE-WACC-NEXT: lxv v5, 0(r3) +; CHECK-LE-WACC-NEXT: lxv v1, 32(r3) +; CHECK-LE-WACC-NEXT: lxv v4, 16(r3) +; CHECK-LE-WACC-NEXT: lxv v0, 48(r3) +; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-LE-WACC-NEXT: xvi16ger2pp wacc0, v2, v2 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxv v4, 48(r7) +; CHECK-LE-WACC-NEXT: stxv v5, 32(r7) +; CHECK-LE-WACC-NEXT: stxv v2, 16(r7) +; CHECK-LE-WACC-NEXT: stxv v3, 0(r7) +; CHECK-LE-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test4: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvi16ger2pp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.xvi16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -179,6 +285,36 @@ define dso_local void @test5(ptr nocapture readonly %vqp, ptr nocapture readnone ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-LE-WACC-LABEL: test5: +; CHECK-LE-WACC: # %bb.0: # %entry +; CHECK-LE-WACC-NEXT: lxv v5, 0(r3) +; CHECK-LE-WACC-NEXT: lxv v1, 32(r3) +; CHECK-LE-WACC-NEXT: lxv v4, 16(r3) +; CHECK-LE-WACC-NEXT: lxv v0, 48(r3) +; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-LE-WACC-NEXT: pmxvi8ger4spp wacc0, v2, v2, 0, 0, 0 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxv v4, 48(r7) +; CHECK-LE-WACC-NEXT: stxv v5, 32(r7) +; CHECK-LE-WACC-NEXT: stxv v2, 16(r7) +; CHECK-LE-WACC-NEXT: stxv v3, 0(r7) +; CHECK-LE-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test5: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvi8ger4spp wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi8ger4spp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) @@ -220,6 +356,36 @@ define dso_local void @test6(ptr nocapture readonly %vqp, ptr nocapture readnone ; CHECK-BE-NEXT: stxv vs3, 48(r7) ; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr +; +; CHECK-LE-WACC-LABEL: test6: +; CHECK-LE-WACC: # %bb.0: # %entry +; CHECK-LE-WACC-NEXT: lxv v5, 0(r3) +; CHECK-LE-WACC-NEXT: lxv v1, 32(r3) +; CHECK-LE-WACC-NEXT: lxv v4, 16(r3) +; CHECK-LE-WACC-NEXT: lxv v0, 48(r3) +; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-LE-WACC-NEXT: pmxvi16ger2pp wacc0, v2, v2, 0, 0, 0 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxv v4, 48(r7) +; CHECK-LE-WACC-NEXT: stxv v5, 32(r7) +; CHECK-LE-WACC-NEXT: stxv v2, 16(r7) +; CHECK-LE-WACC-NEXT: stxv v3, 0(r7) +; CHECK-LE-WACC-NEXT: blr +; +; CHECK-BE-WACC-LABEL: test6: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: pmxvi16ger2pp wacc0, v2, v2, 0, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <512 x i1> @llvm.ppc.mma.pmxvi16ger2pp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc, i32 0, i32 0, i32 0) From 2a02a9a1500b6d80b89fd6853c4bf35bde3c9cfa Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Thu, 30 Oct 2025 17:05:36 -0500 Subject: [PATCH 4/5] isolate changes --- .../CodeGen/PowerPC/mma-acc-copy-hints.ll | 170 +- llvm/test/CodeGen/PowerPC/mma-acc-memops.ll | 428 +-- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll | 1646 ++++----- .../test/CodeGen/PowerPC/mma-outer-product.ll | 2996 +++++------------ 4 files changed, 2034 insertions(+), 3206 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll b/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll index c383485cbfba2..94121f09e36be 100644 --- a/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-copy-hints.ll @@ -7,10 +7,10 @@ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=future -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-LE-FUTURE +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-LE-WACC ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=future -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-FUTURE +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, ptr nocapture noundef writeonly %c) local_unnamed_addr #0 { ; CHECK-LABEL: testMultiply: @@ -98,90 +98,90 @@ define void @testMultiply(ptr nocapture noundef readonly %a, ptr nocapture nound ; CHECK-BE-NEXT: mtlr r0 ; CHECK-BE-NEXT: blr ; -; CHECK-LE-FUTURE-LABEL: testMultiply: -; CHECK-LE-FUTURE: # %bb.0: # %entry -; CHECK-LE-FUTURE-NEXT: mflr r0 -; CHECK-LE-FUTURE-NEXT: std r30, -16(r1) -; CHECK-LE-FUTURE-NEXT: std r0, 16(r1) -; CHECK-LE-FUTURE-NEXT: clrldi r0, r1, 59 -; CHECK-LE-FUTURE-NEXT: subfic r0, r0, -128 -; CHECK-LE-FUTURE-NEXT: mr r30, r1 -; CHECK-LE-FUTURE-NEXT: stdux r1, r1, r0 -; CHECK-LE-FUTURE-NEXT: stxv v30, -64(r30) # 16-byte Folded Spill -; CHECK-LE-FUTURE-NEXT: stxv v31, -48(r30) # 16-byte Folded Spill -; CHECK-LE-FUTURE-NEXT: lxv v31, 0(r3) -; CHECK-LE-FUTURE-NEXT: lxv v30, 0(r4) -; CHECK-LE-FUTURE-NEXT: addi r3, r1, 32 -; CHECK-LE-FUTURE-NEXT: std r29, -24(r30) # 8-byte Folded Spill -; CHECK-LE-FUTURE-NEXT: vmr v2, v31 -; CHECK-LE-FUTURE-NEXT: vmr v3, v30 -; CHECK-LE-FUTURE-NEXT: mr r29, r5 -; CHECK-LE-FUTURE-NEXT: bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_@notoc -; CHECK-LE-FUTURE-NEXT: dmxxsetaccz wacc0 -; CHECK-LE-FUTURE-NEXT: xvf32gerpp wacc0, v31, v30 -; CHECK-LE-FUTURE-NEXT: lxv vs0, 48(r1) -; CHECK-LE-FUTURE-NEXT: lxv vs1, 32(r1) -; CHECK-LE-FUTURE-NEXT: xvf32gerpp wacc0, vs1, vs0 -; CHECK-LE-FUTURE-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 -; CHECK-LE-FUTURE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 -; CHECK-LE-FUTURE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-LE-FUTURE-NEXT: stxv v5, 0(r29) -; CHECK-LE-FUTURE-NEXT: pstxv v4, 8(r29), 0 -; CHECK-LE-FUTURE-NEXT: stxv v3, 16(r29) -; CHECK-LE-FUTURE-NEXT: pstxv v2, 24(r29), 0 -; CHECK-LE-FUTURE-NEXT: lxv v31, -48(r30) # 16-byte Folded Reload -; CHECK-LE-FUTURE-NEXT: lxv v30, -64(r30) # 16-byte Folded Reload -; CHECK-LE-FUTURE-NEXT: ld r29, -24(r30) # 8-byte Folded Reload -; CHECK-LE-FUTURE-NEXT: mr r1, r30 -; CHECK-LE-FUTURE-NEXT: ld r0, 16(r1) -; CHECK-LE-FUTURE-NEXT: ld r30, -16(r1) -; CHECK-LE-FUTURE-NEXT: mtlr r0 -; CHECK-LE-FUTURE-NEXT: blr +; CHECK-LE-WACC-LABEL: testMultiply: +; CHECK-LE-WACC: # %bb.0: # %entry +; CHECK-LE-WACC-NEXT: mflr r0 +; CHECK-LE-WACC-NEXT: std r30, -16(r1) +; CHECK-LE-WACC-NEXT: std r0, 16(r1) +; CHECK-LE-WACC-NEXT: clrldi r0, r1, 59 +; CHECK-LE-WACC-NEXT: subfic r0, r0, -128 +; CHECK-LE-WACC-NEXT: mr r30, r1 +; CHECK-LE-WACC-NEXT: stdux r1, r1, r0 +; CHECK-LE-WACC-NEXT: stxv v30, -64(r30) # 16-byte Folded Spill +; CHECK-LE-WACC-NEXT: stxv v31, -48(r30) # 16-byte Folded Spill +; CHECK-LE-WACC-NEXT: lxv v31, 0(r3) +; CHECK-LE-WACC-NEXT: lxv v30, 0(r4) +; CHECK-LE-WACC-NEXT: addi r3, r1, 32 +; CHECK-LE-WACC-NEXT: std r29, -24(r30) # 8-byte Folded Spill +; CHECK-LE-WACC-NEXT: vmr v2, v31 +; CHECK-LE-WACC-NEXT: vmr v3, v30 +; CHECK-LE-WACC-NEXT: mr r29, r5 +; CHECK-LE-WACC-NEXT: bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_@notoc +; CHECK-LE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-LE-WACC-NEXT: xvf32gerpp wacc0, v31, v30 +; CHECK-LE-WACC-NEXT: lxv vs0, 48(r1) +; CHECK-LE-WACC-NEXT: lxv vs1, 32(r1) +; CHECK-LE-WACC-NEXT: xvf32gerpp wacc0, vs1, vs0 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; CHECK-LE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; CHECK-LE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-LE-WACC-NEXT: stxv v5, 0(r29) +; CHECK-LE-WACC-NEXT: pstxv v4, 8(r29), 0 +; CHECK-LE-WACC-NEXT: stxv v3, 16(r29) +; CHECK-LE-WACC-NEXT: pstxv v2, 24(r29), 0 +; CHECK-LE-WACC-NEXT: lxv v31, -48(r30) # 16-byte Folded Reload +; CHECK-LE-WACC-NEXT: lxv v30, -64(r30) # 16-byte Folded Reload +; CHECK-LE-WACC-NEXT: ld r29, -24(r30) # 8-byte Folded Reload +; CHECK-LE-WACC-NEXT: mr r1, r30 +; CHECK-LE-WACC-NEXT: ld r0, 16(r1) +; CHECK-LE-WACC-NEXT: ld r30, -16(r1) +; CHECK-LE-WACC-NEXT: mtlr r0 +; CHECK-LE-WACC-NEXT: blr ; -; CHECK-BE-FUTURE-LABEL: testMultiply: -; CHECK-BE-FUTURE: # %bb.0: # %entry -; CHECK-BE-FUTURE-NEXT: mflr r0 -; CHECK-BE-FUTURE-NEXT: std r30, -16(r1) -; CHECK-BE-FUTURE-NEXT: std r0, 16(r1) -; CHECK-BE-FUTURE-NEXT: clrldi r0, r1, 59 -; CHECK-BE-FUTURE-NEXT: subfic r0, r0, -224 -; CHECK-BE-FUTURE-NEXT: mr r30, r1 -; CHECK-BE-FUTURE-NEXT: stdux r1, r1, r0 -; CHECK-BE-FUTURE-NEXT: stxv v30, -64(r30) # 16-byte Folded Spill -; CHECK-BE-FUTURE-NEXT: stxv v31, -48(r30) # 16-byte Folded Spill -; CHECK-BE-FUTURE-NEXT: lxv v31, 0(r3) -; CHECK-BE-FUTURE-NEXT: lxv v30, 0(r4) -; CHECK-BE-FUTURE-NEXT: addi r3, r1, 128 -; CHECK-BE-FUTURE-NEXT: std r29, -24(r30) # 8-byte Folded Spill -; CHECK-BE-FUTURE-NEXT: vmr v2, v31 -; CHECK-BE-FUTURE-NEXT: vmr v3, v30 -; CHECK-BE-FUTURE-NEXT: mr r29, r5 -; CHECK-BE-FUTURE-NEXT: bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_ -; CHECK-BE-FUTURE-NEXT: nop -; CHECK-BE-FUTURE-NEXT: dmxxsetaccz wacc0 -; CHECK-BE-FUTURE-NEXT: xvf32gerpp wacc0, v31, v30 -; CHECK-BE-FUTURE-NEXT: lxv vs0, 128(r1) -; CHECK-BE-FUTURE-NEXT: lxv vs1, 144(r1) -; CHECK-BE-FUTURE-NEXT: xvf32gerpp wacc0, vs0, vs1 -; CHECK-BE-FUTURE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-FUTURE-NEXT: vmr v1, v2 -; CHECK-BE-FUTURE-NEXT: vmr v7, v4 -; CHECK-BE-FUTURE-NEXT: vmr v0, v3 -; CHECK-BE-FUTURE-NEXT: vmr v6, v5 -; CHECK-BE-FUTURE-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 -; CHECK-BE-FUTURE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-FUTURE-NEXT: stxv v2, 0(r29) -; CHECK-BE-FUTURE-NEXT: pstxv v3, 8(r29), 0 -; CHECK-BE-FUTURE-NEXT: stxv v4, 16(r29) -; CHECK-BE-FUTURE-NEXT: pstxv v5, 24(r29), 0 -; CHECK-BE-FUTURE-NEXT: lxv v31, -48(r30) # 16-byte Folded Reload -; CHECK-BE-FUTURE-NEXT: lxv v30, -64(r30) # 16-byte Folded Reload -; CHECK-BE-FUTURE-NEXT: ld r29, -24(r30) # 8-byte Folded Reload -; CHECK-BE-FUTURE-NEXT: mr r1, r30 -; CHECK-BE-FUTURE-NEXT: ld r0, 16(r1) -; CHECK-BE-FUTURE-NEXT: ld r30, -16(r1) -; CHECK-BE-FUTURE-NEXT: mtlr r0 -; CHECK-BE-FUTURE-NEXT: blr +; CHECK-BE-WACC-LABEL: testMultiply: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: mflr r0 +; CHECK-BE-WACC-NEXT: std r30, -16(r1) +; CHECK-BE-WACC-NEXT: std r0, 16(r1) +; CHECK-BE-WACC-NEXT: clrldi r0, r1, 59 +; CHECK-BE-WACC-NEXT: subfic r0, r0, -224 +; CHECK-BE-WACC-NEXT: mr r30, r1 +; CHECK-BE-WACC-NEXT: stdux r1, r1, r0 +; CHECK-BE-WACC-NEXT: stxv v30, -64(r30) # 16-byte Folded Spill +; CHECK-BE-WACC-NEXT: stxv v31, -48(r30) # 16-byte Folded Spill +; CHECK-BE-WACC-NEXT: lxv v31, 0(r3) +; CHECK-BE-WACC-NEXT: lxv v30, 0(r4) +; CHECK-BE-WACC-NEXT: addi r3, r1, 128 +; CHECK-BE-WACC-NEXT: std r29, -24(r30) # 8-byte Folded Spill +; CHECK-BE-WACC-NEXT: vmr v2, v31 +; CHECK-BE-WACC-NEXT: vmr v3, v30 +; CHECK-BE-WACC-NEXT: mr r29, r5 +; CHECK-BE-WACC-NEXT: bl _Z15buildVectorPairPu13__vector_pairDv16_hS0_ +; CHECK-BE-WACC-NEXT: nop +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, v31, v30 +; CHECK-BE-WACC-NEXT: lxv vs0, 128(r1) +; CHECK-BE-WACC-NEXT: lxv vs1, 144(r1) +; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, vs0, vs1 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: vmr v1, v2 +; CHECK-BE-WACC-NEXT: vmr v7, v4 +; CHECK-BE-WACC-NEXT: vmr v0, v3 +; CHECK-BE-WACC-NEXT: vmr v6, v5 +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp38, vsp32, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v2, 0(r29) +; CHECK-BE-WACC-NEXT: pstxv v3, 8(r29), 0 +; CHECK-BE-WACC-NEXT: stxv v4, 16(r29) +; CHECK-BE-WACC-NEXT: pstxv v5, 24(r29), 0 +; CHECK-BE-WACC-NEXT: lxv v31, -48(r30) # 16-byte Folded Reload +; CHECK-BE-WACC-NEXT: lxv v30, -64(r30) # 16-byte Folded Reload +; CHECK-BE-WACC-NEXT: ld r29, -24(r30) # 8-byte Folded Reload +; CHECK-BE-WACC-NEXT: mr r1, r30 +; CHECK-BE-WACC-NEXT: ld r0, 16(r1) +; CHECK-BE-WACC-NEXT: ld r30, -16(r1) +; CHECK-BE-WACC-NEXT: mtlr r0 +; CHECK-BE-WACC-NEXT: blr entry: %vP = alloca <256 x i1>, align 32 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %vP) diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll index 1076230a0a7d7..bc5d5bed36e9b 100644 --- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll @@ -1,20 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -disable-auto-paired-vec-st=false < %s | FileCheck %s \ ; RUN: --check-prefix=LE-PAIRED ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -disable-auto-paired-vec-st=false < %s | FileCheck %s \ -; RUN: --check-prefix=LE-PAIRED-PWR10 +; RUN: --check-prefix=LE-PAIRED-WACC ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ -; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr -disable-auto-paired-vec-st=false < %s | \ ; RUN: FileCheck %s --check-prefix=BE-PAIRED ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ -; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ +; RUN: -mcpu=future -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr -disable-auto-paired-vec-st=false < %s | \ -; RUN: FileCheck %s --check-prefix=BE-PAIRED-PWR10 +; RUN: FileCheck %s --check-prefix=BE-PAIRED-WACC ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \ ; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \ ; RUN: | FileCheck %s --check-prefix=LE-PWR9 @@ -34,59 +34,59 @@ define dso_local void @testLdSt(i64 %SrcIdx, i64 %DstIdx) { ; LE-PAIRED-LABEL: testLdSt: ; LE-PAIRED: # %bb.0: # %entry -; LE-PAIRED-NEXT: plxv v3, f@PCREL+64(0), 1 -; LE-PAIRED-NEXT: plxv v5, f@PCREL+96(0), 1 -; LE-PAIRED-NEXT: plxv v2, f@PCREL+80(0), 1 -; LE-PAIRED-NEXT: plxv v4, f@PCREL+112(0), 1 -; LE-PAIRED-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 -; LE-PAIRED-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; LE-PAIRED-NEXT: pstxv v4, f@PCREL+176(0), 1 -; LE-PAIRED-NEXT: pstxv v5, f@PCREL+160(0), 1 -; LE-PAIRED-NEXT: pstxv v2, f@PCREL+144(0), 1 -; LE-PAIRED-NEXT: pstxv v3, f@PCREL+128(0), 1 +; LE-PAIRED-NEXT: plxv vs3, f@PCREL+64(0), 1 +; LE-PAIRED-NEXT: plxv vs2, f@PCREL+80(0), 1 +; LE-PAIRED-NEXT: plxv vs1, f@PCREL+96(0), 1 +; LE-PAIRED-NEXT: plxv vs0, f@PCREL+112(0), 1 +; LE-PAIRED-NEXT: pstxv vs0, f@PCREL+176(0), 1 +; LE-PAIRED-NEXT: pstxv vs1, f@PCREL+160(0), 1 +; LE-PAIRED-NEXT: pstxv vs2, f@PCREL+144(0), 1 +; LE-PAIRED-NEXT: pstxv vs3, f@PCREL+128(0), 1 ; LE-PAIRED-NEXT: blr ; -; LE-PAIRED-PWR10-LABEL: testLdSt: -; LE-PAIRED-PWR10: # %bb.0: # %entry -; LE-PAIRED-PWR10-NEXT: plxv vs3, f@PCREL+64(0), 1 -; LE-PAIRED-PWR10-NEXT: plxv vs2, f@PCREL+80(0), 1 -; LE-PAIRED-PWR10-NEXT: plxv vs1, f@PCREL+96(0), 1 -; LE-PAIRED-PWR10-NEXT: plxv vs0, f@PCREL+112(0), 1 -; LE-PAIRED-PWR10-NEXT: pstxv vs0, f@PCREL+176(0), 1 -; LE-PAIRED-PWR10-NEXT: pstxv vs1, f@PCREL+160(0), 1 -; LE-PAIRED-PWR10-NEXT: pstxv vs2, f@PCREL+144(0), 1 -; LE-PAIRED-PWR10-NEXT: pstxv vs3, f@PCREL+128(0), 1 -; LE-PAIRED-PWR10-NEXT: blr +; LE-PAIRED-WACC-LABEL: testLdSt: +; LE-PAIRED-WACC: # %bb.0: # %entry +; LE-PAIRED-WACC-NEXT: plxv v3, f@PCREL+64(0), 1 +; LE-PAIRED-WACC-NEXT: plxv v5, f@PCREL+96(0), 1 +; LE-PAIRED-WACC-NEXT: plxv v2, f@PCREL+80(0), 1 +; LE-PAIRED-WACC-NEXT: plxv v4, f@PCREL+112(0), 1 +; LE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; LE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; LE-PAIRED-WACC-NEXT: pstxv v4, f@PCREL+176(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv v5, f@PCREL+160(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv v2, f@PCREL+144(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv v3, f@PCREL+128(0), 1 +; LE-PAIRED-WACC-NEXT: blr ; ; BE-PAIRED-LABEL: testLdSt: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r3, r2, f@toc@ha ; BE-PAIRED-NEXT: addi r3, r3, f@toc@l -; BE-PAIRED-NEXT: lxv v3, 112(r3) -; BE-PAIRED-NEXT: lxv v5, 80(r3) -; BE-PAIRED-NEXT: lxv v2, 96(r3) -; BE-PAIRED-NEXT: lxv v4, 64(r3) -; BE-PAIRED-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 -; BE-PAIRED-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; BE-PAIRED-NEXT: stxv v5, 176(r3) -; BE-PAIRED-NEXT: stxv v4, 160(r3) -; BE-PAIRED-NEXT: stxv v3, 144(r3) -; BE-PAIRED-NEXT: stxv v2, 128(r3) +; BE-PAIRED-NEXT: lxv vs3, 112(r3) +; BE-PAIRED-NEXT: lxv vs2, 96(r3) +; BE-PAIRED-NEXT: lxv vs1, 80(r3) +; BE-PAIRED-NEXT: lxv vs0, 64(r3) +; BE-PAIRED-NEXT: stxv vs1, 144(r3) +; BE-PAIRED-NEXT: stxv vs0, 128(r3) +; BE-PAIRED-NEXT: stxv vs3, 176(r3) +; BE-PAIRED-NEXT: stxv vs2, 160(r3) ; BE-PAIRED-NEXT: blr ; -; BE-PAIRED-PWR10-LABEL: testLdSt: -; BE-PAIRED-PWR10: # %bb.0: # %entry -; BE-PAIRED-PWR10-NEXT: addis r3, r2, f@toc@ha -; BE-PAIRED-PWR10-NEXT: addi r3, r3, f@toc@l -; BE-PAIRED-PWR10-NEXT: lxv vs3, 112(r3) -; BE-PAIRED-PWR10-NEXT: lxv vs2, 96(r3) -; BE-PAIRED-PWR10-NEXT: lxv vs1, 80(r3) -; BE-PAIRED-PWR10-NEXT: lxv vs0, 64(r3) -; BE-PAIRED-PWR10-NEXT: stxv vs1, 144(r3) -; BE-PAIRED-PWR10-NEXT: stxv vs0, 128(r3) -; BE-PAIRED-PWR10-NEXT: stxv vs3, 176(r3) -; BE-PAIRED-PWR10-NEXT: stxv vs2, 160(r3) -; BE-PAIRED-PWR10-NEXT: blr +; BE-PAIRED-WACC-LABEL: testLdSt: +; BE-PAIRED-WACC: # %bb.0: # %entry +; BE-PAIRED-WACC-NEXT: addis r3, r2, f@toc@ha +; BE-PAIRED-WACC-NEXT: addi r3, r3, f@toc@l +; BE-PAIRED-WACC-NEXT: lxv v3, 112(r3) +; BE-PAIRED-WACC-NEXT: lxv v5, 80(r3) +; BE-PAIRED-WACC-NEXT: lxv v2, 96(r3) +; BE-PAIRED-WACC-NEXT: lxv v4, 64(r3) +; BE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; BE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; BE-PAIRED-WACC-NEXT: stxv v5, 176(r3) +; BE-PAIRED-WACC-NEXT: stxv v4, 160(r3) +; BE-PAIRED-WACC-NEXT: stxv v3, 144(r3) +; BE-PAIRED-WACC-NEXT: stxv v2, 128(r3) +; BE-PAIRED-WACC-NEXT: blr ; ; LE-PWR9-LABEL: testLdSt: ; LE-PWR9: # %bb.0: # %entry @@ -173,36 +173,36 @@ define dso_local void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) { ; LE-PAIRED-NEXT: paddi r5, 0, f@PCREL, 1 ; LE-PAIRED-NEXT: sldi r3, r3, 6 ; LE-PAIRED-NEXT: add r6, r5, r3 -; LE-PAIRED-NEXT: lxvx v3, r5, r3 -; LE-PAIRED-NEXT: lxv v2, 16(r6) -; LE-PAIRED-NEXT: lxv v5, 32(r6) -; LE-PAIRED-NEXT: lxv v4, 48(r6) +; LE-PAIRED-NEXT: lxvx vs3, r5, r3 ; LE-PAIRED-NEXT: sldi r3, r4, 6 ; LE-PAIRED-NEXT: add r4, r5, r3 -; LE-PAIRED-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 -; LE-PAIRED-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; LE-PAIRED-NEXT: stxvx v3, r5, r3 -; LE-PAIRED-NEXT: stxv v4, 48(r4) -; LE-PAIRED-NEXT: stxv v5, 32(r4) -; LE-PAIRED-NEXT: stxv v2, 16(r4) +; LE-PAIRED-NEXT: lxv vs2, 16(r6) +; LE-PAIRED-NEXT: lxv vs1, 32(r6) +; LE-PAIRED-NEXT: lxv vs0, 48(r6) +; LE-PAIRED-NEXT: stxvx vs3, r5, r3 +; LE-PAIRED-NEXT: stxv vs0, 48(r4) +; LE-PAIRED-NEXT: stxv vs1, 32(r4) +; LE-PAIRED-NEXT: stxv vs2, 16(r4) ; LE-PAIRED-NEXT: blr ; -; LE-PAIRED-PWR10-LABEL: testXLdSt: -; LE-PAIRED-PWR10: # %bb.0: # %entry -; LE-PAIRED-PWR10-NEXT: paddi r5, 0, f@PCREL, 1 -; LE-PAIRED-PWR10-NEXT: sldi r3, r3, 6 -; LE-PAIRED-PWR10-NEXT: add r6, r5, r3 -; LE-PAIRED-PWR10-NEXT: lxvx vs3, r5, r3 -; LE-PAIRED-PWR10-NEXT: sldi r3, r4, 6 -; LE-PAIRED-PWR10-NEXT: add r4, r5, r3 -; LE-PAIRED-PWR10-NEXT: lxv vs2, 16(r6) -; LE-PAIRED-PWR10-NEXT: lxv vs1, 32(r6) -; LE-PAIRED-PWR10-NEXT: lxv vs0, 48(r6) -; LE-PAIRED-PWR10-NEXT: stxvx vs3, r5, r3 -; LE-PAIRED-PWR10-NEXT: stxv vs0, 48(r4) -; LE-PAIRED-PWR10-NEXT: stxv vs1, 32(r4) -; LE-PAIRED-PWR10-NEXT: stxv vs2, 16(r4) -; LE-PAIRED-PWR10-NEXT: blr +; LE-PAIRED-WACC-LABEL: testXLdSt: +; LE-PAIRED-WACC: # %bb.0: # %entry +; LE-PAIRED-WACC-NEXT: paddi r5, 0, f@PCREL, 1 +; LE-PAIRED-WACC-NEXT: sldi r3, r3, 6 +; LE-PAIRED-WACC-NEXT: add r6, r5, r3 +; LE-PAIRED-WACC-NEXT: lxvx v3, r5, r3 +; LE-PAIRED-WACC-NEXT: lxv v2, 16(r6) +; LE-PAIRED-WACC-NEXT: lxv v5, 32(r6) +; LE-PAIRED-WACC-NEXT: lxv v4, 48(r6) +; LE-PAIRED-WACC-NEXT: sldi r3, r4, 6 +; LE-PAIRED-WACC-NEXT: add r4, r5, r3 +; LE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; LE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; LE-PAIRED-WACC-NEXT: stxvx v3, r5, r3 +; LE-PAIRED-WACC-NEXT: stxv v4, 48(r4) +; LE-PAIRED-WACC-NEXT: stxv v5, 32(r4) +; LE-PAIRED-WACC-NEXT: stxv v2, 16(r4) +; LE-PAIRED-WACC-NEXT: blr ; ; BE-PAIRED-LABEL: testXLdSt: ; BE-PAIRED: # %bb.0: # %entry @@ -210,37 +210,37 @@ define dso_local void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) { ; BE-PAIRED-NEXT: addi r5, r5, f@toc@l ; BE-PAIRED-NEXT: sldi r3, r3, 6 ; BE-PAIRED-NEXT: add r6, r5, r3 -; BE-PAIRED-NEXT: lxvx v2, r5, r3 -; BE-PAIRED-NEXT: lxv v5, 48(r6) -; BE-PAIRED-NEXT: lxv v3, 16(r6) -; BE-PAIRED-NEXT: lxv v4, 32(r6) +; BE-PAIRED-NEXT: lxv vs3, 48(r6) +; BE-PAIRED-NEXT: lxv vs2, 32(r6) +; BE-PAIRED-NEXT: lxvx vs0, r5, r3 +; BE-PAIRED-NEXT: lxv vs1, 16(r6) ; BE-PAIRED-NEXT: sldi r3, r4, 6 ; BE-PAIRED-NEXT: add r4, r5, r3 -; BE-PAIRED-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 -; BE-PAIRED-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; BE-PAIRED-NEXT: stxvx v2, r5, r3 -; BE-PAIRED-NEXT: stxv v5, 48(r4) -; BE-PAIRED-NEXT: stxv v4, 32(r4) -; BE-PAIRED-NEXT: stxv v3, 16(r4) +; BE-PAIRED-NEXT: stxvx vs0, r5, r3 +; BE-PAIRED-NEXT: stxv vs1, 16(r4) +; BE-PAIRED-NEXT: stxv vs3, 48(r4) +; BE-PAIRED-NEXT: stxv vs2, 32(r4) ; BE-PAIRED-NEXT: blr ; -; BE-PAIRED-PWR10-LABEL: testXLdSt: -; BE-PAIRED-PWR10: # %bb.0: # %entry -; BE-PAIRED-PWR10-NEXT: addis r5, r2, f@toc@ha -; BE-PAIRED-PWR10-NEXT: addi r5, r5, f@toc@l -; BE-PAIRED-PWR10-NEXT: sldi r3, r3, 6 -; BE-PAIRED-PWR10-NEXT: add r6, r5, r3 -; BE-PAIRED-PWR10-NEXT: lxv vs3, 48(r6) -; BE-PAIRED-PWR10-NEXT: lxv vs2, 32(r6) -; BE-PAIRED-PWR10-NEXT: lxvx vs0, r5, r3 -; BE-PAIRED-PWR10-NEXT: lxv vs1, 16(r6) -; BE-PAIRED-PWR10-NEXT: sldi r3, r4, 6 -; BE-PAIRED-PWR10-NEXT: add r4, r5, r3 -; BE-PAIRED-PWR10-NEXT: stxvx vs0, r5, r3 -; BE-PAIRED-PWR10-NEXT: stxv vs1, 16(r4) -; BE-PAIRED-PWR10-NEXT: stxv vs3, 48(r4) -; BE-PAIRED-PWR10-NEXT: stxv vs2, 32(r4) -; BE-PAIRED-PWR10-NEXT: blr +; BE-PAIRED-WACC-LABEL: testXLdSt: +; BE-PAIRED-WACC: # %bb.0: # %entry +; BE-PAIRED-WACC-NEXT: addis r5, r2, f@toc@ha +; BE-PAIRED-WACC-NEXT: addi r5, r5, f@toc@l +; BE-PAIRED-WACC-NEXT: sldi r3, r3, 6 +; BE-PAIRED-WACC-NEXT: add r6, r5, r3 +; BE-PAIRED-WACC-NEXT: lxvx v2, r5, r3 +; BE-PAIRED-WACC-NEXT: lxv v5, 48(r6) +; BE-PAIRED-WACC-NEXT: lxv v3, 16(r6) +; BE-PAIRED-WACC-NEXT: lxv v4, 32(r6) +; BE-PAIRED-WACC-NEXT: sldi r3, r4, 6 +; BE-PAIRED-WACC-NEXT: add r4, r5, r3 +; BE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; BE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; BE-PAIRED-WACC-NEXT: stxvx v2, r5, r3 +; BE-PAIRED-WACC-NEXT: stxv v5, 48(r4) +; BE-PAIRED-WACC-NEXT: stxv v4, 32(r4) +; BE-PAIRED-WACC-NEXT: stxv v3, 16(r4) +; BE-PAIRED-WACC-NEXT: blr ; ; LE-PWR9-LABEL: testXLdSt: ; LE-PWR9: # %bb.0: # %entry @@ -330,59 +330,59 @@ entry: define dso_local void @testUnalignedLdSt() { ; LE-PAIRED-LABEL: testUnalignedLdSt: ; LE-PAIRED: # %bb.0: # %entry -; LE-PAIRED-NEXT: plxv v3, f@PCREL+11(0), 1 -; LE-PAIRED-NEXT: plxv v5, f@PCREL+43(0), 1 -; LE-PAIRED-NEXT: plxv v2, f@PCREL+27(0), 1 -; LE-PAIRED-NEXT: plxv v4, f@PCREL+59(0), 1 -; LE-PAIRED-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 -; LE-PAIRED-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; LE-PAIRED-NEXT: pstxv v4, f@PCREL+67(0), 1 -; LE-PAIRED-NEXT: pstxv v5, f@PCREL+51(0), 1 -; LE-PAIRED-NEXT: pstxv v2, f@PCREL+35(0), 1 -; LE-PAIRED-NEXT: pstxv v3, f@PCREL+19(0), 1 +; LE-PAIRED-NEXT: plxv vs3, f@PCREL+11(0), 1 +; LE-PAIRED-NEXT: plxv vs2, f@PCREL+27(0), 1 +; LE-PAIRED-NEXT: plxv vs1, f@PCREL+43(0), 1 +; LE-PAIRED-NEXT: plxv vs0, f@PCREL+59(0), 1 +; LE-PAIRED-NEXT: pstxv vs0, f@PCREL+67(0), 1 +; LE-PAIRED-NEXT: pstxv vs1, f@PCREL+51(0), 1 +; LE-PAIRED-NEXT: pstxv vs2, f@PCREL+35(0), 1 +; LE-PAIRED-NEXT: pstxv vs3, f@PCREL+19(0), 1 ; LE-PAIRED-NEXT: blr ; -; LE-PAIRED-PWR10-LABEL: testUnalignedLdSt: -; LE-PAIRED-PWR10: # %bb.0: # %entry -; LE-PAIRED-PWR10-NEXT: plxv vs3, f@PCREL+11(0), 1 -; LE-PAIRED-PWR10-NEXT: plxv vs2, f@PCREL+27(0), 1 -; LE-PAIRED-PWR10-NEXT: plxv vs1, f@PCREL+43(0), 1 -; LE-PAIRED-PWR10-NEXT: plxv vs0, f@PCREL+59(0), 1 -; LE-PAIRED-PWR10-NEXT: pstxv vs0, f@PCREL+67(0), 1 -; LE-PAIRED-PWR10-NEXT: pstxv vs1, f@PCREL+51(0), 1 -; LE-PAIRED-PWR10-NEXT: pstxv vs2, f@PCREL+35(0), 1 -; LE-PAIRED-PWR10-NEXT: pstxv vs3, f@PCREL+19(0), 1 -; LE-PAIRED-PWR10-NEXT: blr +; LE-PAIRED-WACC-LABEL: testUnalignedLdSt: +; LE-PAIRED-WACC: # %bb.0: # %entry +; LE-PAIRED-WACC-NEXT: plxv v3, f@PCREL+11(0), 1 +; LE-PAIRED-WACC-NEXT: plxv v5, f@PCREL+43(0), 1 +; LE-PAIRED-WACC-NEXT: plxv v2, f@PCREL+27(0), 1 +; LE-PAIRED-WACC-NEXT: plxv v4, f@PCREL+59(0), 1 +; LE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; LE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; LE-PAIRED-WACC-NEXT: pstxv v4, f@PCREL+67(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv v5, f@PCREL+51(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv v2, f@PCREL+35(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv v3, f@PCREL+19(0), 1 +; LE-PAIRED-WACC-NEXT: blr ; ; BE-PAIRED-LABEL: testUnalignedLdSt: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r3, r2, f@toc@ha ; BE-PAIRED-NEXT: addi r3, r3, f@toc@l -; BE-PAIRED-NEXT: plxv v3, 59(r3), 0 -; BE-PAIRED-NEXT: plxv v5, 27(r3), 0 -; BE-PAIRED-NEXT: plxv v2, 43(r3), 0 -; BE-PAIRED-NEXT: plxv v4, 11(r3), 0 -; BE-PAIRED-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 -; BE-PAIRED-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; BE-PAIRED-NEXT: pstxv v5, 67(r3), 0 -; BE-PAIRED-NEXT: pstxv v4, 51(r3), 0 -; BE-PAIRED-NEXT: pstxv v3, 35(r3), 0 -; BE-PAIRED-NEXT: pstxv v2, 19(r3), 0 +; BE-PAIRED-NEXT: plxv vs3, 59(r3), 0 +; BE-PAIRED-NEXT: plxv vs2, 43(r3), 0 +; BE-PAIRED-NEXT: plxv vs1, 27(r3), 0 +; BE-PAIRED-NEXT: plxv vs0, 11(r3), 0 +; BE-PAIRED-NEXT: pstxv vs1, 35(r3), 0 +; BE-PAIRED-NEXT: pstxv vs0, 19(r3), 0 +; BE-PAIRED-NEXT: pstxv vs3, 67(r3), 0 +; BE-PAIRED-NEXT: pstxv vs2, 51(r3), 0 ; BE-PAIRED-NEXT: blr ; -; BE-PAIRED-PWR10-LABEL: testUnalignedLdSt: -; BE-PAIRED-PWR10: # %bb.0: # %entry -; BE-PAIRED-PWR10-NEXT: addis r3, r2, f@toc@ha -; BE-PAIRED-PWR10-NEXT: addi r3, r3, f@toc@l -; BE-PAIRED-PWR10-NEXT: plxv vs3, 59(r3), 0 -; BE-PAIRED-PWR10-NEXT: plxv vs2, 43(r3), 0 -; BE-PAIRED-PWR10-NEXT: plxv vs1, 27(r3), 0 -; BE-PAIRED-PWR10-NEXT: plxv vs0, 11(r3), 0 -; BE-PAIRED-PWR10-NEXT: pstxv vs1, 35(r3), 0 -; BE-PAIRED-PWR10-NEXT: pstxv vs0, 19(r3), 0 -; BE-PAIRED-PWR10-NEXT: pstxv vs3, 67(r3), 0 -; BE-PAIRED-PWR10-NEXT: pstxv vs2, 51(r3), 0 -; BE-PAIRED-PWR10-NEXT: blr +; BE-PAIRED-WACC-LABEL: testUnalignedLdSt: +; BE-PAIRED-WACC: # %bb.0: # %entry +; BE-PAIRED-WACC-NEXT: addis r3, r2, f@toc@ha +; BE-PAIRED-WACC-NEXT: addi r3, r3, f@toc@l +; BE-PAIRED-WACC-NEXT: plxv v3, 59(r3), 0 +; BE-PAIRED-WACC-NEXT: plxv v5, 27(r3), 0 +; BE-PAIRED-WACC-NEXT: plxv v2, 43(r3), 0 +; BE-PAIRED-WACC-NEXT: plxv v4, 11(r3), 0 +; BE-PAIRED-WACC-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0 +; BE-PAIRED-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; BE-PAIRED-WACC-NEXT: pstxv v5, 67(r3), 0 +; BE-PAIRED-WACC-NEXT: pstxv v4, 51(r3), 0 +; BE-PAIRED-WACC-NEXT: pstxv v3, 35(r3), 0 +; BE-PAIRED-WACC-NEXT: pstxv v2, 19(r3), 0 +; BE-PAIRED-WACC-NEXT: blr ; ; LE-PWR9-LABEL: testUnalignedLdSt: ; LE-PWR9: # %bb.0: # %entry @@ -488,33 +488,33 @@ define dso_local void @testLdStPair(i64 %SrcIdx, i64 %DstIdx) { ; LE-PAIRED-NEXT: pstxv vs1, g@PCREL+64(0), 1 ; LE-PAIRED-NEXT: blr ; -; LE-PAIRED-PWR10-LABEL: testLdStPair: -; LE-PAIRED-PWR10: # %bb.0: # %entry -; LE-PAIRED-PWR10-NEXT: plxv vs0, g@PCREL+48(0), 1 -; LE-PAIRED-PWR10-NEXT: plxv vs1, g@PCREL+32(0), 1 -; LE-PAIRED-PWR10-NEXT: pstxv vs0, g@PCREL+80(0), 1 -; LE-PAIRED-PWR10-NEXT: pstxv vs1, g@PCREL+64(0), 1 -; LE-PAIRED-PWR10-NEXT: blr +; LE-PAIRED-WACC-LABEL: testLdStPair: +; LE-PAIRED-WACC: # %bb.0: # %entry +; LE-PAIRED-WACC-NEXT: plxv vs0, g@PCREL+48(0), 1 +; LE-PAIRED-WACC-NEXT: plxv vs1, g@PCREL+32(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv vs0, g@PCREL+80(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv vs1, g@PCREL+64(0), 1 +; LE-PAIRED-WACC-NEXT: blr ; ; BE-PAIRED-LABEL: testLdStPair: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r3, r2, g@toc@ha ; BE-PAIRED-NEXT: addi r3, r3, g@toc@l -; BE-PAIRED-NEXT: lxv vs0, 48(r3) -; BE-PAIRED-NEXT: lxv vs1, 32(r3) -; BE-PAIRED-NEXT: stxv vs0, 80(r3) -; BE-PAIRED-NEXT: stxv vs1, 64(r3) +; BE-PAIRED-NEXT: lxv vs0, 32(r3) +; BE-PAIRED-NEXT: lxv vs1, 48(r3) +; BE-PAIRED-NEXT: stxv vs1, 80(r3) +; BE-PAIRED-NEXT: stxv vs0, 64(r3) ; BE-PAIRED-NEXT: blr ; -; BE-PAIRED-PWR10-LABEL: testLdStPair: -; BE-PAIRED-PWR10: # %bb.0: # %entry -; BE-PAIRED-PWR10-NEXT: addis r3, r2, g@toc@ha -; BE-PAIRED-PWR10-NEXT: addi r3, r3, g@toc@l -; BE-PAIRED-PWR10-NEXT: lxv vs0, 32(r3) -; BE-PAIRED-PWR10-NEXT: lxv vs1, 48(r3) -; BE-PAIRED-PWR10-NEXT: stxv vs1, 80(r3) -; BE-PAIRED-PWR10-NEXT: stxv vs0, 64(r3) -; BE-PAIRED-PWR10-NEXT: blr +; BE-PAIRED-WACC-LABEL: testLdStPair: +; BE-PAIRED-WACC: # %bb.0: # %entry +; BE-PAIRED-WACC-NEXT: addis r3, r2, g@toc@ha +; BE-PAIRED-WACC-NEXT: addi r3, r3, g@toc@l +; BE-PAIRED-WACC-NEXT: lxv vs0, 48(r3) +; BE-PAIRED-WACC-NEXT: lxv vs1, 32(r3) +; BE-PAIRED-WACC-NEXT: stxv vs0, 80(r3) +; BE-PAIRED-WACC-NEXT: stxv vs1, 64(r3) +; BE-PAIRED-WACC-NEXT: blr ; ; LE-PWR9-LABEL: testLdStPair: ; LE-PWR9: # %bb.0: # %entry @@ -578,25 +578,25 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) { ; LE-PAIRED-NEXT: paddi r5, 0, g@PCREL, 1 ; LE-PAIRED-NEXT: add r6, r5, r3 ; LE-PAIRED-NEXT: lxvx vs0, r5, r3 -; LE-PAIRED-NEXT: lxv vs1, 16(r6) ; LE-PAIRED-NEXT: sldi r3, r4, 5 ; LE-PAIRED-NEXT: add r4, r5, r3 +; LE-PAIRED-NEXT: lxv vs1, 16(r6) ; LE-PAIRED-NEXT: stxvx vs0, r5, r3 ; LE-PAIRED-NEXT: stxv vs1, 16(r4) ; LE-PAIRED-NEXT: blr ; -; LE-PAIRED-PWR10-LABEL: testXLdStPair: -; LE-PAIRED-PWR10: # %bb.0: # %entry -; LE-PAIRED-PWR10-NEXT: sldi r3, r3, 5 -; LE-PAIRED-PWR10-NEXT: paddi r5, 0, g@PCREL, 1 -; LE-PAIRED-PWR10-NEXT: add r6, r5, r3 -; LE-PAIRED-PWR10-NEXT: lxvx vs0, r5, r3 -; LE-PAIRED-PWR10-NEXT: sldi r3, r4, 5 -; LE-PAIRED-PWR10-NEXT: add r4, r5, r3 -; LE-PAIRED-PWR10-NEXT: lxv vs1, 16(r6) -; LE-PAIRED-PWR10-NEXT: stxvx vs0, r5, r3 -; LE-PAIRED-PWR10-NEXT: stxv vs1, 16(r4) -; LE-PAIRED-PWR10-NEXT: blr +; LE-PAIRED-WACC-LABEL: testXLdStPair: +; LE-PAIRED-WACC: # %bb.0: # %entry +; LE-PAIRED-WACC-NEXT: sldi r3, r3, 5 +; LE-PAIRED-WACC-NEXT: paddi r5, 0, g@PCREL, 1 +; LE-PAIRED-WACC-NEXT: add r6, r5, r3 +; LE-PAIRED-WACC-NEXT: lxvx vs0, r5, r3 +; LE-PAIRED-WACC-NEXT: lxv vs1, 16(r6) +; LE-PAIRED-WACC-NEXT: sldi r3, r4, 5 +; LE-PAIRED-WACC-NEXT: add r4, r5, r3 +; LE-PAIRED-WACC-NEXT: stxvx vs0, r5, r3 +; LE-PAIRED-WACC-NEXT: stxv vs1, 16(r4) +; LE-PAIRED-WACC-NEXT: blr ; ; BE-PAIRED-LABEL: testXLdStPair: ; BE-PAIRED: # %bb.0: # %entry @@ -605,26 +605,26 @@ define dso_local void @testXLdStPair(i64 %SrcIdx, i64 %DstIdx) { ; BE-PAIRED-NEXT: addi r5, r5, g@toc@l ; BE-PAIRED-NEXT: add r6, r5, r3 ; BE-PAIRED-NEXT: lxvx vs0, r5, r3 -; BE-PAIRED-NEXT: lxv vs1, 16(r6) ; BE-PAIRED-NEXT: sldi r3, r4, 5 ; BE-PAIRED-NEXT: add r4, r5, r3 +; BE-PAIRED-NEXT: lxv vs1, 16(r6) ; BE-PAIRED-NEXT: stxvx vs0, r5, r3 ; BE-PAIRED-NEXT: stxv vs1, 16(r4) ; BE-PAIRED-NEXT: blr ; -; BE-PAIRED-PWR10-LABEL: testXLdStPair: -; BE-PAIRED-PWR10: # %bb.0: # %entry -; BE-PAIRED-PWR10-NEXT: addis r5, r2, g@toc@ha -; BE-PAIRED-PWR10-NEXT: sldi r3, r3, 5 -; BE-PAIRED-PWR10-NEXT: addi r5, r5, g@toc@l -; BE-PAIRED-PWR10-NEXT: add r6, r5, r3 -; BE-PAIRED-PWR10-NEXT: lxvx vs0, r5, r3 -; BE-PAIRED-PWR10-NEXT: sldi r3, r4, 5 -; BE-PAIRED-PWR10-NEXT: add r4, r5, r3 -; BE-PAIRED-PWR10-NEXT: lxv vs1, 16(r6) -; BE-PAIRED-PWR10-NEXT: stxvx vs0, r5, r3 -; BE-PAIRED-PWR10-NEXT: stxv vs1, 16(r4) -; BE-PAIRED-PWR10-NEXT: blr +; BE-PAIRED-WACC-LABEL: testXLdStPair: +; BE-PAIRED-WACC: # %bb.0: # %entry +; BE-PAIRED-WACC-NEXT: addis r5, r2, g@toc@ha +; BE-PAIRED-WACC-NEXT: sldi r3, r3, 5 +; BE-PAIRED-WACC-NEXT: addi r5, r5, g@toc@l +; BE-PAIRED-WACC-NEXT: add r6, r5, r3 +; BE-PAIRED-WACC-NEXT: lxvx vs0, r5, r3 +; BE-PAIRED-WACC-NEXT: lxv vs1, 16(r6) +; BE-PAIRED-WACC-NEXT: sldi r3, r4, 5 +; BE-PAIRED-WACC-NEXT: add r4, r5, r3 +; BE-PAIRED-WACC-NEXT: stxvx vs0, r5, r3 +; BE-PAIRED-WACC-NEXT: stxv vs1, 16(r4) +; BE-PAIRED-WACC-NEXT: blr ; ; LE-PWR9-LABEL: testXLdStPair: ; LE-PWR9: # %bb.0: # %entry @@ -700,33 +700,33 @@ define dso_local void @testUnalignedLdStPair() { ; LE-PAIRED-NEXT: pstxv vs1, g@PCREL+19(0), 1 ; LE-PAIRED-NEXT: blr ; -; LE-PAIRED-PWR10-LABEL: testUnalignedLdStPair: -; LE-PAIRED-PWR10: # %bb.0: # %entry -; LE-PAIRED-PWR10-NEXT: plxv vs0, g@PCREL+27(0), 1 -; LE-PAIRED-PWR10-NEXT: plxv vs1, g@PCREL+11(0), 1 -; LE-PAIRED-PWR10-NEXT: pstxv vs0, g@PCREL+35(0), 1 -; LE-PAIRED-PWR10-NEXT: pstxv vs1, g@PCREL+19(0), 1 -; LE-PAIRED-PWR10-NEXT: blr +; LE-PAIRED-WACC-LABEL: testUnalignedLdStPair: +; LE-PAIRED-WACC: # %bb.0: # %entry +; LE-PAIRED-WACC-NEXT: plxv vs0, g@PCREL+27(0), 1 +; LE-PAIRED-WACC-NEXT: plxv vs1, g@PCREL+11(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv vs0, g@PCREL+35(0), 1 +; LE-PAIRED-WACC-NEXT: pstxv vs1, g@PCREL+19(0), 1 +; LE-PAIRED-WACC-NEXT: blr ; ; BE-PAIRED-LABEL: testUnalignedLdStPair: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r3, r2, g@toc@ha ; BE-PAIRED-NEXT: addi r3, r3, g@toc@l -; BE-PAIRED-NEXT: plxv vs0, 27(r3), 0 -; BE-PAIRED-NEXT: plxv vs1, 11(r3), 0 -; BE-PAIRED-NEXT: pstxv vs0, 35(r3), 0 -; BE-PAIRED-NEXT: pstxv vs1, 19(r3), 0 +; BE-PAIRED-NEXT: plxv vs0, 11(r3), 0 +; BE-PAIRED-NEXT: plxv vs1, 27(r3), 0 +; BE-PAIRED-NEXT: pstxv vs1, 35(r3), 0 +; BE-PAIRED-NEXT: pstxv vs0, 19(r3), 0 ; BE-PAIRED-NEXT: blr ; -; BE-PAIRED-PWR10-LABEL: testUnalignedLdStPair: -; BE-PAIRED-PWR10: # %bb.0: # %entry -; BE-PAIRED-PWR10-NEXT: addis r3, r2, g@toc@ha -; BE-PAIRED-PWR10-NEXT: addi r3, r3, g@toc@l -; BE-PAIRED-PWR10-NEXT: plxv vs0, 11(r3), 0 -; BE-PAIRED-PWR10-NEXT: plxv vs1, 27(r3), 0 -; BE-PAIRED-PWR10-NEXT: pstxv vs1, 35(r3), 0 -; BE-PAIRED-PWR10-NEXT: pstxv vs0, 19(r3), 0 -; BE-PAIRED-PWR10-NEXT: blr +; BE-PAIRED-WACC-LABEL: testUnalignedLdStPair: +; BE-PAIRED-WACC: # %bb.0: # %entry +; BE-PAIRED-WACC-NEXT: addis r3, r2, g@toc@ha +; BE-PAIRED-WACC-NEXT: addi r3, r3, g@toc@l +; BE-PAIRED-WACC-NEXT: plxv vs0, 27(r3), 0 +; BE-PAIRED-WACC-NEXT: plxv vs1, 11(r3), 0 +; BE-PAIRED-WACC-NEXT: pstxv vs0, 35(r3), 0 +; BE-PAIRED-WACC-NEXT: pstxv vs1, 19(r3), 0 +; BE-PAIRED-WACC-NEXT: blr ; ; LE-PWR9-LABEL: testUnalignedLdStPair: ; LE-PWR9: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll index 6999e9c1521c4..3505cbb197bf9 100644 --- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -1,65 +1,65 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ -; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-PWR10 +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-WACC ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ -; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-PWR10 +; RUN: -mcpu=future -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC ; assemble_acc declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) define void @ass_acc(ptr %ptr, <16 x i8> %vc) { ; CHECK-LABEL: ass_acc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmr v3, v2 -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r3) -; CHECK-NEXT: stxv v5, 32(r3) -; CHECK-NEXT: stxv v2, 16(r3) -; CHECK-NEXT: stxv v3, 0(r3) +; CHECK-NEXT: xxlor vs3, v2, v2 +; CHECK-NEXT: xxlor vs2, v2, v2 +; CHECK-NEXT: xxlor vs0, vs2, vs2 +; CHECK-NEXT: xxlor vs1, vs3, vs3 +; CHECK-NEXT: stxv vs0, 48(r3) +; CHECK-NEXT: stxv vs1, 32(r3) +; CHECK-NEXT: stxv vs2, 16(r3) +; CHECK-NEXT: stxv vs3, 0(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: ass_acc: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vmr v3, v2 -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r3) -; CHECK-BE-NEXT: stxv v4, 32(r3) -; CHECK-BE-NEXT: stxv v3, 16(r3) -; CHECK-BE-NEXT: stxv v2, 0(r3) +; CHECK-BE-NEXT: xxlor vs3, v2, v2 +; CHECK-BE-NEXT: xxlor vs2, v2, v2 +; CHECK-BE-NEXT: xxlor vs0, vs2, vs2 +; CHECK-BE-NEXT: xxlor vs1, vs3, vs3 +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: ass_acc: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: xxlor vs3, v2, v2 -; CHECK-PWR10-NEXT: xxlor vs2, v2, v2 -; CHECK-PWR10-NEXT: xxlor vs0, vs2, vs2 -; CHECK-PWR10-NEXT: xxlor vs1, vs3, vs3 -; CHECK-PWR10-NEXT: stxv vs0, 48(r3) -; CHECK-PWR10-NEXT: stxv vs1, 32(r3) -; CHECK-PWR10-NEXT: stxv vs2, 16(r3) -; CHECK-PWR10-NEXT: stxv vs3, 0(r3) -; CHECK-PWR10-NEXT: blr +; CHECK-WACC-LABEL: ass_acc: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: vmr v3, v2 +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: blr ; -; CHECK-BE-PWR10-LABEL: ass_acc: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: xxlor vs3, v2, v2 -; CHECK-BE-PWR10-NEXT: xxlor vs2, v2, v2 -; CHECK-BE-PWR10-NEXT: xxlor vs0, vs2, vs2 -; CHECK-BE-PWR10-NEXT: xxlor vs1, vs3, vs3 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: blr +; CHECK-BE-WACC-LABEL: ass_acc: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: vmr v3, v2 +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc) store <512 x i1> %0, ptr %ptr, align 64 @@ -71,51 +71,51 @@ declare <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1>) define void @int_xxmtacc(ptr %ptr, <16 x i8> %vc) { ; CHECK-LABEL: int_xxmtacc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmr v3, v2 -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r3) -; CHECK-NEXT: stxv v5, 32(r3) -; CHECK-NEXT: stxv v2, 16(r3) -; CHECK-NEXT: stxv v3, 0(r3) +; CHECK-NEXT: xxlor vs3, v2, v2 +; CHECK-NEXT: xxlor vs2, v2, v2 +; CHECK-NEXT: xxlor vs0, vs2, vs2 +; CHECK-NEXT: xxlor vs1, vs3, vs3 +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: stxv vs0, 48(r3) +; CHECK-NEXT: stxv vs1, 32(r3) +; CHECK-NEXT: stxv vs2, 16(r3) +; CHECK-NEXT: stxv vs3, 0(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: int_xxmtacc: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vmr v3, v2 -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r3) -; CHECK-BE-NEXT: stxv v4, 32(r3) -; CHECK-BE-NEXT: stxv v3, 16(r3) -; CHECK-BE-NEXT: stxv v2, 0(r3) +; CHECK-BE-NEXT: xxlor vs3, v2, v2 +; CHECK-BE-NEXT: xxlor vs2, v2, v2 +; CHECK-BE-NEXT: xxlor vs0, vs2, vs2 +; CHECK-BE-NEXT: xxlor vs1, vs3, vs3 +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: int_xxmtacc: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: xxlor vs3, v2, v2 -; CHECK-PWR10-NEXT: xxlor vs2, v2, v2 -; CHECK-PWR10-NEXT: xxlor vs0, vs2, vs2 -; CHECK-PWR10-NEXT: xxlor vs1, vs3, vs3 -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r3) -; CHECK-PWR10-NEXT: stxv vs1, 32(r3) -; CHECK-PWR10-NEXT: stxv vs2, 16(r3) -; CHECK-PWR10-NEXT: stxv vs3, 0(r3) -; CHECK-PWR10-NEXT: blr +; CHECK-WACC-LABEL: int_xxmtacc: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: vmr v3, v2 +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: blr ; -; CHECK-BE-PWR10-LABEL: int_xxmtacc: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: xxlor vs3, v2, v2 -; CHECK-BE-PWR10-NEXT: xxlor vs2, v2, v2 -; CHECK-BE-PWR10-NEXT: xxlor vs0, vs2, vs2 -; CHECK-BE-PWR10-NEXT: xxlor vs1, vs3, vs3 -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: blr +; CHECK-BE-WACC-LABEL: int_xxmtacc: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: vmr v3, v2 +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: blr entry: ; One xxmtacc is generated from the call to assemble.acc then one xxmtacc is ; generated from the call to xxmtacc then one xxmfacc is generated for the store @@ -130,49 +130,49 @@ declare <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1>) define void @int_xxmfacc(ptr %ptr, <16 x i8> %vc) { ; CHECK-LABEL: int_xxmfacc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmr v3, v2 -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r3) -; CHECK-NEXT: stxv v5, 32(r3) -; CHECK-NEXT: stxv v2, 16(r3) -; CHECK-NEXT: stxv v3, 0(r3) +; CHECK-NEXT: xxlor vs3, v2, v2 +; CHECK-NEXT: xxlor vs2, v2, v2 +; CHECK-NEXT: xxlor vs0, vs2, vs2 +; CHECK-NEXT: xxlor vs1, vs3, vs3 +; CHECK-NEXT: stxv vs0, 48(r3) +; CHECK-NEXT: stxv vs1, 32(r3) +; CHECK-NEXT: stxv vs2, 16(r3) +; CHECK-NEXT: stxv vs3, 0(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: int_xxmfacc: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vmr v3, v2 -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r3) -; CHECK-BE-NEXT: stxv v4, 32(r3) -; CHECK-BE-NEXT: stxv v3, 16(r3) -; CHECK-BE-NEXT: stxv v2, 0(r3) +; CHECK-BE-NEXT: xxlor vs3, v2, v2 +; CHECK-BE-NEXT: xxlor vs2, v2, v2 +; CHECK-BE-NEXT: xxlor vs0, vs2, vs2 +; CHECK-BE-NEXT: xxlor vs1, vs3, vs3 +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: int_xxmfacc: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: xxlor vs3, v2, v2 -; CHECK-PWR10-NEXT: xxlor vs2, v2, v2 -; CHECK-PWR10-NEXT: xxlor vs0, vs2, vs2 -; CHECK-PWR10-NEXT: xxlor vs1, vs3, vs3 -; CHECK-PWR10-NEXT: stxv vs0, 48(r3) -; CHECK-PWR10-NEXT: stxv vs1, 32(r3) -; CHECK-PWR10-NEXT: stxv vs2, 16(r3) -; CHECK-PWR10-NEXT: stxv vs3, 0(r3) -; CHECK-PWR10-NEXT: blr +; CHECK-WACC-LABEL: int_xxmfacc: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: vmr v3, v2 +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: blr ; -; CHECK-BE-PWR10-LABEL: int_xxmfacc: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: xxlor vs3, v2, v2 -; CHECK-BE-PWR10-NEXT: xxlor vs2, v2, v2 -; CHECK-BE-PWR10-NEXT: xxlor vs0, vs2, vs2 -; CHECK-BE-PWR10-NEXT: xxlor vs1, vs3, vs3 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: blr +; CHECK-BE-WACC-LABEL: int_xxmfacc: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: vmr v3, v2 +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp34, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: blr entry: ; One xxmtacc is generated from the call to assemble.acc then one xxmfacc is ; generated from the call to xxmfacc then one xxmfacc is generated for the store @@ -187,43 +187,43 @@ declare <512 x i1> @llvm.ppc.mma.xxsetaccz() define void @int_xxsetaccz(ptr %ptr) { ; CHECK-LABEL: int_xxsetaccz: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: dmxxsetaccz wacc0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r3) -; CHECK-NEXT: stxv v5, 32(r3) -; CHECK-NEXT: stxv v2, 16(r3) -; CHECK-NEXT: stxv v3, 0(r3) +; CHECK-NEXT: xxsetaccz acc0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r3) +; CHECK-NEXT: stxv vs1, 32(r3) +; CHECK-NEXT: stxv vs2, 16(r3) +; CHECK-NEXT: stxv vs3, 0(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: int_xxsetaccz: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: dmxxsetaccz wacc0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r3) -; CHECK-BE-NEXT: stxv v4, 32(r3) -; CHECK-BE-NEXT: stxv v3, 16(r3) -; CHECK-BE-NEXT: stxv v2, 0(r3) +; CHECK-BE-NEXT: xxsetaccz acc0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: int_xxsetaccz: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: xxsetaccz acc0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r3) -; CHECK-PWR10-NEXT: stxv vs1, 32(r3) -; CHECK-PWR10-NEXT: stxv vs2, 16(r3) -; CHECK-PWR10-NEXT: stxv vs3, 0(r3) -; CHECK-PWR10-NEXT: blr +; CHECK-WACC-LABEL: int_xxsetaccz: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: blr ; -; CHECK-BE-PWR10-LABEL: int_xxsetaccz: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: xxsetaccz acc0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: blr +; CHECK-BE-WACC-LABEL: int_xxsetaccz: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() store <512 x i1> %0, ptr %ptr, align 64 @@ -235,43 +235,43 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble define void @disass_acc(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4) { ; CHECK-LABEL: disass_acc: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: dmxxsetaccz wacc0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v5, 0(r3) -; CHECK-NEXT: stxv v4, 0(r4) -; CHECK-NEXT: stxv v3, 0(r5) -; CHECK-NEXT: stxv v2, 0(r6) +; CHECK-NEXT: xxsetaccz acc0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: stxv vs2, 0(r4) +; CHECK-NEXT: stxv vs1, 0(r5) +; CHECK-NEXT: stxv vs0, 0(r6) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: disass_acc: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: dmxxsetaccz wacc0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v2, 0(r3) -; CHECK-BE-NEXT: stxv v3, 0(r4) -; CHECK-BE-NEXT: stxv v4, 0(r5) -; CHECK-BE-NEXT: stxv v5, 0(r6) +; CHECK-BE-NEXT: xxsetaccz acc0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs1, 0(r4) +; CHECK-BE-NEXT: stxv vs2, 0(r5) +; CHECK-BE-NEXT: stxv vs3, 0(r6) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: disass_acc: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: xxsetaccz acc0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs3, 0(r3) -; CHECK-PWR10-NEXT: stxv vs2, 0(r4) -; CHECK-PWR10-NEXT: stxv vs1, 0(r5) -; CHECK-PWR10-NEXT: stxv vs0, 0(r6) -; CHECK-PWR10-NEXT: blr +; CHECK-WACC-LABEL: disass_acc: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v5, 0(r3) +; CHECK-WACC-NEXT: stxv v4, 0(r4) +; CHECK-WACC-NEXT: stxv v3, 0(r5) +; CHECK-WACC-NEXT: stxv v2, 0(r6) +; CHECK-WACC-NEXT: blr ; -; CHECK-BE-PWR10-LABEL: disass_acc: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: xxsetaccz acc0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: stxv vs1, 0(r4) -; CHECK-BE-PWR10-NEXT: stxv vs2, 0(r5) -; CHECK-BE-PWR10-NEXT: stxv vs3, 0(r6) -; CHECK-BE-PWR10-NEXT: blr +; CHECK-BE-WACC-LABEL: disass_acc: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 0(r4) +; CHECK-BE-WACC-NEXT: stxv v4, 0(r5) +; CHECK-BE-WACC-NEXT: stxv v5, 0(r6) +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0) @@ -293,21 +293,21 @@ define void @testBranch(ptr %ptr, <16 x i8> %vc, i32 %val) { ; CHECK-NEXT: cmplwi r7, 0 ; CHECK-NEXT: beq cr0, .LBB5_2 ; CHECK-NEXT: # %bb.1: # %if.then -; CHECK-NEXT: dmxxsetaccz wacc0 +; CHECK-NEXT: xxsetaccz acc0 ; CHECK-NEXT: b .LBB5_3 ; CHECK-NEXT: .LBB5_2: # %if.else -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: xvi4ger8pp wacc0, v2, v2 +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvi4ger8pp acc0, v2, v2 ; CHECK-NEXT: .LBB5_3: # %if.end -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r3) -; CHECK-NEXT: stxv v5, 32(r3) -; CHECK-NEXT: stxv v2, 16(r3) -; CHECK-NEXT: stxv v3, 0(r3) +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r3) +; CHECK-NEXT: stxv vs1, 32(r3) +; CHECK-NEXT: stxv vs2, 16(r3) +; CHECK-NEXT: stxv vs3, 0(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testBranch: @@ -315,66 +315,66 @@ define void @testBranch(ptr %ptr, <16 x i8> %vc, i32 %val) { ; CHECK-BE-NEXT: cmplwi r7, 0 ; CHECK-BE-NEXT: beq cr0, .LBB5_2 ; CHECK-BE-NEXT: # %bb.1: # %if.then -; CHECK-BE-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-NEXT: xxsetaccz acc0 ; CHECK-BE-NEXT: b .LBB5_3 ; CHECK-BE-NEXT: .LBB5_2: # %if.else -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: xvi4ger8pp wacc0, v2, v2 +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v2 ; CHECK-BE-NEXT: .LBB5_3: # %if.end -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r3) -; CHECK-BE-NEXT: stxv v4, 32(r3) -; CHECK-BE-NEXT: stxv v3, 16(r3) -; CHECK-BE-NEXT: stxv v2, 0(r3) +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: testBranch: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: cmplwi r7, 0 -; CHECK-PWR10-NEXT: beq cr0, .LBB5_2 -; CHECK-PWR10-NEXT: # %bb.1: # %if.then -; CHECK-PWR10-NEXT: xxsetaccz acc0 -; CHECK-PWR10-NEXT: b .LBB5_3 -; CHECK-PWR10-NEXT: .LBB5_2: # %if.else -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: xvi4ger8pp acc0, v2, v2 -; CHECK-PWR10-NEXT: .LBB5_3: # %if.end -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r3) -; CHECK-PWR10-NEXT: stxv vs1, 32(r3) -; CHECK-PWR10-NEXT: stxv vs2, 16(r3) -; CHECK-PWR10-NEXT: stxv vs3, 0(r3) -; CHECK-PWR10-NEXT: blr +; CHECK-WACC-LABEL: testBranch: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: cmplwi r7, 0 +; CHECK-WACC-NEXT: beq cr0, .LBB5_2 +; CHECK-WACC-NEXT: # %bb.1: # %if.then +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: b .LBB5_3 +; CHECK-WACC-NEXT: .LBB5_2: # %if.else +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: xvi4ger8pp wacc0, v2, v2 +; CHECK-WACC-NEXT: .LBB5_3: # %if.end +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: blr ; -; CHECK-BE-PWR10-LABEL: testBranch: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: cmplwi r7, 0 -; CHECK-BE-PWR10-NEXT: beq cr0, .LBB5_2 -; CHECK-BE-PWR10-NEXT: # %bb.1: # %if.then -; CHECK-BE-PWR10-NEXT: xxsetaccz acc0 -; CHECK-BE-PWR10-NEXT: b .LBB5_3 -; CHECK-BE-PWR10-NEXT: .LBB5_2: # %if.else -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: xvi4ger8pp acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: .LBB5_3: # %if.end -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: blr +; CHECK-BE-WACC-LABEL: testBranch: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: cmplwi r7, 0 +; CHECK-BE-WACC-NEXT: beq cr0, .LBB5_2 +; CHECK-BE-WACC-NEXT: # %bb.1: # %if.then +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: b .LBB5_3 +; CHECK-BE-WACC-NEXT: .LBB5_2: # %if.else +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: xvi4ger8pp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: .LBB5_3: # %if.end +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: blr entry: %tobool = icmp eq i32 %val, 0 br i1 %tobool, label %if.else, label %if.then @@ -402,63 +402,63 @@ declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>) define void @testcse(ptr %res, <16 x i8> %vc) { ; CHECK-LABEL: testcse: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: dmxxsetaccz wacc0 -; CHECK-NEXT: xvf32gerpp wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r3) -; CHECK-NEXT: stxv v5, 32(r3) -; CHECK-NEXT: stxv v2, 16(r3) -; CHECK-NEXT: stxv v3, 0(r3) -; CHECK-NEXT: stxv v4, 112(r3) -; CHECK-NEXT: stxv v5, 96(r3) -; CHECK-NEXT: stxv v2, 80(r3) -; CHECK-NEXT: stxv v3, 64(r3) +; CHECK-NEXT: xxsetaccz acc0 +; CHECK-NEXT: xvf32gerpp acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r3) +; CHECK-NEXT: stxv vs1, 32(r3) +; CHECK-NEXT: stxv vs2, 16(r3) +; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: stxv vs0, 112(r3) +; CHECK-NEXT: stxv vs1, 96(r3) +; CHECK-NEXT: stxv vs2, 80(r3) +; CHECK-NEXT: stxv vs3, 64(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testcse: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: dmxxsetaccz wacc0 -; CHECK-BE-NEXT: xvf32gerpp wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r3) -; CHECK-BE-NEXT: stxv v4, 32(r3) -; CHECK-BE-NEXT: stxv v3, 16(r3) -; CHECK-BE-NEXT: stxv v2, 0(r3) -; CHECK-BE-NEXT: stxv v5, 112(r3) -; CHECK-BE-NEXT: stxv v4, 96(r3) -; CHECK-BE-NEXT: stxv v3, 80(r3) -; CHECK-BE-NEXT: stxv v2, 64(r3) +; CHECK-BE-NEXT: xxsetaccz acc0 +; CHECK-BE-NEXT: xvf32gerpp acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) +; CHECK-BE-NEXT: stxv vs1, 80(r3) +; CHECK-BE-NEXT: stxv vs0, 64(r3) +; CHECK-BE-NEXT: stxv vs3, 112(r3) +; CHECK-BE-NEXT: stxv vs2, 96(r3) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: testcse: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: xxsetaccz acc0 -; CHECK-PWR10-NEXT: xvf32gerpp acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r3) -; CHECK-PWR10-NEXT: stxv vs1, 32(r3) -; CHECK-PWR10-NEXT: stxv vs2, 16(r3) -; CHECK-PWR10-NEXT: stxv vs3, 0(r3) -; CHECK-PWR10-NEXT: stxv vs0, 112(r3) -; CHECK-PWR10-NEXT: stxv vs1, 96(r3) -; CHECK-PWR10-NEXT: stxv vs2, 80(r3) -; CHECK-PWR10-NEXT: stxv vs3, 64(r3) -; CHECK-PWR10-NEXT: blr +; CHECK-WACC-LABEL: testcse: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: stxv v4, 112(r3) +; CHECK-WACC-NEXT: stxv v5, 96(r3) +; CHECK-WACC-NEXT: stxv v2, 80(r3) +; CHECK-WACC-NEXT: stxv v3, 64(r3) +; CHECK-WACC-NEXT: blr ; -; CHECK-BE-PWR10-LABEL: testcse: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: xxsetaccz acc0 -; CHECK-BE-PWR10-NEXT: xvf32gerpp acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: stxv vs1, 80(r3) -; CHECK-BE-PWR10-NEXT: stxv vs0, 64(r3) -; CHECK-BE-PWR10-NEXT: stxv vs3, 112(r3) -; CHECK-BE-PWR10-NEXT: stxv vs2, 96(r3) -; CHECK-BE-PWR10-NEXT: blr +; CHECK-BE-WACC-LABEL: testcse: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: stxv v5, 112(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 96(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 80(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 64(r3) +; CHECK-BE-WACC-NEXT: blr entry: %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() %1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() @@ -473,75 +473,75 @@ entry: define void @testcse2(ptr %res, <16 x i8> %vc) { ; CHECK-LABEL: testcse2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: dmxxsetaccz wacc1 -; CHECK-NEXT: dmxxsetaccz wacc0 -; CHECK-NEXT: xvf32gerpp wacc1, v2, v2 -; CHECK-NEXT: xvf32gerpn wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 -; CHECK-NEXT: stxv v4, 48(r3) -; CHECK-NEXT: stxv v5, 32(r3) -; CHECK-NEXT: stxv v2, 16(r3) -; CHECK-NEXT: stxv v3, 0(r3) -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 112(r3) -; CHECK-NEXT: stxv v5, 96(r3) -; CHECK-NEXT: stxv v2, 80(r3) -; CHECK-NEXT: stxv v3, 64(r3) +; CHECK-NEXT: xxsetaccz acc0 +; CHECK-NEXT: xxsetaccz acc1 +; CHECK-NEXT: xvf32gerpp acc1, v2, v2 +; CHECK-NEXT: xvf32gerpn acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc1 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs4, 48(r3) +; CHECK-NEXT: stxv vs5, 32(r3) +; CHECK-NEXT: stxv vs6, 16(r3) +; CHECK-NEXT: stxv vs7, 0(r3) +; CHECK-NEXT: stxv vs0, 112(r3) +; CHECK-NEXT: stxv vs1, 96(r3) +; CHECK-NEXT: stxv vs2, 80(r3) +; CHECK-NEXT: stxv vs3, 64(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testcse2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: dmxxsetaccz wacc1 -; CHECK-BE-NEXT: dmxxsetaccz wacc0 -; CHECK-BE-NEXT: xvf32gerpp wacc1, v2, v2 -; CHECK-BE-NEXT: xvf32gerpn wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 -; CHECK-BE-NEXT: stxv v5, 48(r3) -; CHECK-BE-NEXT: stxv v4, 32(r3) -; CHECK-BE-NEXT: stxv v3, 16(r3) -; CHECK-BE-NEXT: stxv v2, 0(r3) -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 112(r3) -; CHECK-BE-NEXT: stxv v4, 96(r3) -; CHECK-BE-NEXT: stxv v3, 80(r3) -; CHECK-BE-NEXT: stxv v2, 64(r3) +; CHECK-BE-NEXT: xxsetaccz acc0 +; CHECK-BE-NEXT: xxsetaccz acc1 +; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v2 +; CHECK-BE-NEXT: xvf32gerpn acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc1 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs5, 16(r3) +; CHECK-BE-NEXT: stxv vs4, 0(r3) +; CHECK-BE-NEXT: stxv vs7, 48(r3) +; CHECK-BE-NEXT: stxv vs6, 32(r3) +; CHECK-BE-NEXT: stxv vs1, 80(r3) +; CHECK-BE-NEXT: stxv vs0, 64(r3) +; CHECK-BE-NEXT: stxv vs3, 112(r3) +; CHECK-BE-NEXT: stxv vs2, 96(r3) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: testcse2: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: xxsetaccz acc0 -; CHECK-PWR10-NEXT: xxsetaccz acc1 -; CHECK-PWR10-NEXT: xvf32gerpp acc1, v2, v2 -; CHECK-PWR10-NEXT: xvf32gerpn acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc1 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs4, 48(r3) -; CHECK-PWR10-NEXT: stxv vs5, 32(r3) -; CHECK-PWR10-NEXT: stxv vs6, 16(r3) -; CHECK-PWR10-NEXT: stxv vs7, 0(r3) -; CHECK-PWR10-NEXT: stxv vs0, 112(r3) -; CHECK-PWR10-NEXT: stxv vs1, 96(r3) -; CHECK-PWR10-NEXT: stxv vs2, 80(r3) -; CHECK-PWR10-NEXT: stxv vs3, 64(r3) -; CHECK-PWR10-NEXT: blr +; CHECK-WACC-LABEL: testcse2: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: dmxxsetaccz wacc1 +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: xvf32gerpp wacc1, v2, v2 +; CHECK-WACC-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 112(r3) +; CHECK-WACC-NEXT: stxv v5, 96(r3) +; CHECK-WACC-NEXT: stxv v2, 80(r3) +; CHECK-WACC-NEXT: stxv v3, 64(r3) +; CHECK-WACC-NEXT: blr ; -; CHECK-BE-PWR10-LABEL: testcse2: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: xxsetaccz acc0 -; CHECK-BE-PWR10-NEXT: xxsetaccz acc1 -; CHECK-BE-PWR10-NEXT: xvf32gerpp acc1, v2, v2 -; CHECK-BE-PWR10-NEXT: xvf32gerpn acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc1 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs5, 16(r3) -; CHECK-BE-PWR10-NEXT: stxv vs4, 0(r3) -; CHECK-BE-PWR10-NEXT: stxv vs7, 48(r3) -; CHECK-BE-PWR10-NEXT: stxv vs6, 32(r3) -; CHECK-BE-PWR10-NEXT: stxv vs1, 80(r3) -; CHECK-BE-PWR10-NEXT: stxv vs0, 64(r3) -; CHECK-BE-PWR10-NEXT: stxv vs3, 112(r3) -; CHECK-BE-PWR10-NEXT: stxv vs2, 96(r3) -; CHECK-BE-PWR10-NEXT: blr +; CHECK-BE-WACC-LABEL: testcse2: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc1 +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: xvf32gerpp wacc1, v2, v2 +; CHECK-BE-WACC-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 112(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 96(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 80(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 64(r3) +; CHECK-BE-WACC-NEXT: blr entry: %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() %1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() @@ -556,75 +556,75 @@ entry: define void @testcse3(ptr %res, <16 x i8> %vc) { ; CHECK-LABEL: testcse3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: dmxxsetaccz wacc1 -; CHECK-NEXT: dmxxsetaccz wacc0 -; CHECK-NEXT: xvf32gerpp wacc1, v2, v2 -; CHECK-NEXT: xvf32gerpn wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 -; CHECK-NEXT: stxv v4, 48(r3) -; CHECK-NEXT: stxv v5, 32(r3) -; CHECK-NEXT: stxv v2, 16(r3) -; CHECK-NEXT: stxv v3, 0(r3) -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 112(r3) -; CHECK-NEXT: stxv v5, 96(r3) -; CHECK-NEXT: stxv v2, 80(r3) -; CHECK-NEXT: stxv v3, 64(r3) +; CHECK-NEXT: xxsetaccz acc0 +; CHECK-NEXT: xxsetaccz acc1 +; CHECK-NEXT: xvf32gerpp acc1, v2, v2 +; CHECK-NEXT: xvf32gerpn acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc1 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs4, 48(r3) +; CHECK-NEXT: stxv vs5, 32(r3) +; CHECK-NEXT: stxv vs6, 16(r3) +; CHECK-NEXT: stxv vs7, 0(r3) +; CHECK-NEXT: stxv vs0, 112(r3) +; CHECK-NEXT: stxv vs1, 96(r3) +; CHECK-NEXT: stxv vs2, 80(r3) +; CHECK-NEXT: stxv vs3, 64(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: testcse3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: dmxxsetaccz wacc1 -; CHECK-BE-NEXT: dmxxsetaccz wacc0 -; CHECK-BE-NEXT: xvf32gerpp wacc1, v2, v2 -; CHECK-BE-NEXT: xvf32gerpn wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 -; CHECK-BE-NEXT: stxv v5, 48(r3) -; CHECK-BE-NEXT: stxv v4, 32(r3) -; CHECK-BE-NEXT: stxv v3, 16(r3) -; CHECK-BE-NEXT: stxv v2, 0(r3) -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 112(r3) -; CHECK-BE-NEXT: stxv v4, 96(r3) -; CHECK-BE-NEXT: stxv v3, 80(r3) -; CHECK-BE-NEXT: stxv v2, 64(r3) +; CHECK-BE-NEXT: xxsetaccz acc0 +; CHECK-BE-NEXT: xxsetaccz acc1 +; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v2 +; CHECK-BE-NEXT: xvf32gerpn acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc1 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs5, 16(r3) +; CHECK-BE-NEXT: stxv vs4, 0(r3) +; CHECK-BE-NEXT: stxv vs7, 48(r3) +; CHECK-BE-NEXT: stxv vs6, 32(r3) +; CHECK-BE-NEXT: stxv vs1, 80(r3) +; CHECK-BE-NEXT: stxv vs0, 64(r3) +; CHECK-BE-NEXT: stxv vs3, 112(r3) +; CHECK-BE-NEXT: stxv vs2, 96(r3) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: testcse3: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: xxsetaccz acc0 -; CHECK-PWR10-NEXT: xxsetaccz acc1 -; CHECK-PWR10-NEXT: xvf32gerpp acc1, v2, v2 -; CHECK-PWR10-NEXT: xvf32gerpn acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc1 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs4, 48(r3) -; CHECK-PWR10-NEXT: stxv vs5, 32(r3) -; CHECK-PWR10-NEXT: stxv vs6, 16(r3) -; CHECK-PWR10-NEXT: stxv vs7, 0(r3) -; CHECK-PWR10-NEXT: stxv vs0, 112(r3) -; CHECK-PWR10-NEXT: stxv vs1, 96(r3) -; CHECK-PWR10-NEXT: stxv vs2, 80(r3) -; CHECK-PWR10-NEXT: stxv vs3, 64(r3) -; CHECK-PWR10-NEXT: blr +; CHECK-WACC-LABEL: testcse3: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: dmxxsetaccz wacc1 +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: xvf32gerpp wacc1, v2, v2 +; CHECK-WACC-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r3) +; CHECK-WACC-NEXT: stxv v5, 32(r3) +; CHECK-WACC-NEXT: stxv v2, 16(r3) +; CHECK-WACC-NEXT: stxv v3, 0(r3) +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 112(r3) +; CHECK-WACC-NEXT: stxv v5, 96(r3) +; CHECK-WACC-NEXT: stxv v2, 80(r3) +; CHECK-WACC-NEXT: stxv v3, 64(r3) +; CHECK-WACC-NEXT: blr ; -; CHECK-BE-PWR10-LABEL: testcse3: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: xxsetaccz acc0 -; CHECK-BE-PWR10-NEXT: xxsetaccz acc1 -; CHECK-BE-PWR10-NEXT: xvf32gerpp acc1, v2, v2 -; CHECK-BE-PWR10-NEXT: xvf32gerpn acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc1 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs5, 16(r3) -; CHECK-BE-PWR10-NEXT: stxv vs4, 0(r3) -; CHECK-BE-PWR10-NEXT: stxv vs7, 48(r3) -; CHECK-BE-PWR10-NEXT: stxv vs6, 32(r3) -; CHECK-BE-PWR10-NEXT: stxv vs1, 80(r3) -; CHECK-BE-PWR10-NEXT: stxv vs0, 64(r3) -; CHECK-BE-PWR10-NEXT: stxv vs3, 112(r3) -; CHECK-BE-PWR10-NEXT: stxv vs2, 96(r3) -; CHECK-BE-PWR10-NEXT: blr +; CHECK-BE-WACC-LABEL: testcse3: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc1 +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: xvf32gerpp wacc1, v2, v2 +; CHECK-BE-WACC-NEXT: xvf32gerpn wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 112(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 96(r3) +; CHECK-BE-WACC-NEXT: stxv v3, 80(r3) +; CHECK-BE-WACC-NEXT: stxv v2, 64(r3) +; CHECK-BE-WACC-NEXT: blr entry: %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz() %1 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc) @@ -642,45 +642,45 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) { ; CHECK-NEXT: bltlr cr0 ; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: li r6, 0 ; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r4, 0 -; CHECK-NEXT: li r6, 0 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB9_2: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: rldic r7, r6, 4, 28 +; CHECK-NEXT: xxsetaccz acc2 +; CHECK-NEXT: xxsetaccz acc1 +; CHECK-NEXT: addi r6, r6, 6 ; CHECK-NEXT: add r8, r5, r7 ; CHECK-NEXT: lxvx vs0, r5, r7 +; CHECK-NEXT: rldic r7, r4, 6, 26 +; CHECK-NEXT: addi r4, r4, 3 ; CHECK-NEXT: lxv vs1, 16(r8) -; CHECK-NEXT: dmxxsetaccz wacc2 -; CHECK-NEXT: dmxxsetaccz wacc1 -; CHECK-NEXT: dmxxsetaccz wacc0 -; CHECK-NEXT: xvf32gerpp wacc2, vs0, vs1 +; CHECK-NEXT: xvf32gerpp acc2, vs0, vs1 ; CHECK-NEXT: lxv vs0, 32(r8) ; CHECK-NEXT: lxv vs1, 48(r8) -; CHECK-NEXT: rldic r7, r4, 6, 26 -; CHECK-NEXT: addi r4, r4, 3 -; CHECK-NEXT: addi r6, r6, 6 -; CHECK-NEXT: xvf32gerpn wacc1, vs0, vs1 -; CHECK-NEXT: lxv vs0, 64(r8) -; CHECK-NEXT: lxv vs1, 80(r8) +; CHECK-NEXT: xvf32gerpn acc1, vs0, vs1 +; CHECK-NEXT: lxv vs12, 64(r8) +; CHECK-NEXT: lxv vs13, 80(r8) +; CHECK-NEXT: xxsetaccz acc0 ; CHECK-NEXT: add r8, r3, r7 -; CHECK-NEXT: xvf32gernp wacc0, vs0, vs1 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc2, 0 -; CHECK-NEXT: stxvx v3, r3, r7 -; CHECK-NEXT: stxv v4, 48(r8) -; CHECK-NEXT: stxv v5, 32(r8) -; CHECK-NEXT: stxv v2, 16(r8) -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 -; CHECK-NEXT: stxv v4, 112(r8) -; CHECK-NEXT: stxv v5, 96(r8) -; CHECK-NEXT: stxv v2, 80(r8) -; CHECK-NEXT: stxv v3, 64(r8) -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 176(r8) -; CHECK-NEXT: stxv v5, 160(r8) -; CHECK-NEXT: stxv v2, 144(r8) -; CHECK-NEXT: stxv v3, 128(r8) +; CHECK-NEXT: xxmfacc acc2 +; CHECK-NEXT: xvf32gernp acc0, vs12, vs13 +; CHECK-NEXT: stxvx vs11, r3, r7 +; CHECK-NEXT: stxv vs8, 48(r8) +; CHECK-NEXT: xxmfacc acc1 +; CHECK-NEXT: stxv vs9, 32(r8) +; CHECK-NEXT: stxv vs10, 16(r8) +; CHECK-NEXT: stxv vs4, 112(r8) +; CHECK-NEXT: stxv vs5, 96(r8) +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs6, 80(r8) +; CHECK-NEXT: stxv vs7, 64(r8) +; CHECK-NEXT: stxv vs0, 176(r8) +; CHECK-NEXT: stxv vs1, 160(r8) +; CHECK-NEXT: stxv vs2, 144(r8) +; CHECK-NEXT: stxv vs3, 128(r8) ; CHECK-NEXT: bdnz .LBB9_2 ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-NEXT: blr @@ -691,146 +691,146 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) { ; CHECK-BE-NEXT: bltlr cr0 ; CHECK-BE-NEXT: # %bb.1: # %for.body.preheader ; CHECK-BE-NEXT: clrldi r4, r4, 32 +; CHECK-BE-NEXT: li r6, 0 ; CHECK-BE-NEXT: mtctr r4 ; CHECK-BE-NEXT: li r4, 0 -; CHECK-BE-NEXT: li r6, 0 ; CHECK-BE-NEXT: .p2align 4 ; CHECK-BE-NEXT: .LBB9_2: # %for.body ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: rldic r7, r6, 4, 28 +; CHECK-BE-NEXT: xxsetaccz acc2 +; CHECK-BE-NEXT: xxsetaccz acc1 +; CHECK-BE-NEXT: addi r6, r6, 6 ; CHECK-BE-NEXT: add r8, r5, r7 ; CHECK-BE-NEXT: lxvx vs0, r5, r7 +; CHECK-BE-NEXT: rldic r7, r4, 6, 26 +; CHECK-BE-NEXT: addi r4, r4, 3 ; CHECK-BE-NEXT: lxv vs1, 16(r8) -; CHECK-BE-NEXT: dmxxsetaccz wacc2 -; CHECK-BE-NEXT: dmxxsetaccz wacc1 -; CHECK-BE-NEXT: dmxxsetaccz wacc0 -; CHECK-BE-NEXT: xvf32gerpp wacc2, vs0, vs1 +; CHECK-BE-NEXT: xvf32gerpp acc2, vs0, vs1 ; CHECK-BE-NEXT: lxv vs0, 32(r8) ; CHECK-BE-NEXT: lxv vs1, 48(r8) -; CHECK-BE-NEXT: rldic r7, r4, 6, 26 -; CHECK-BE-NEXT: addi r4, r4, 3 -; CHECK-BE-NEXT: addi r6, r6, 6 -; CHECK-BE-NEXT: xvf32gerpn wacc1, vs0, vs1 -; CHECK-BE-NEXT: lxv vs0, 64(r8) -; CHECK-BE-NEXT: lxv vs1, 80(r8) +; CHECK-BE-NEXT: xvf32gerpn acc1, vs0, vs1 +; CHECK-BE-NEXT: lxv vs12, 64(r8) +; CHECK-BE-NEXT: lxv vs13, 80(r8) +; CHECK-BE-NEXT: xxsetaccz acc0 ; CHECK-BE-NEXT: add r8, r3, r7 -; CHECK-BE-NEXT: xvf32gernp wacc0, vs0, vs1 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc2, 0 -; CHECK-BE-NEXT: stxvx v2, r3, r7 -; CHECK-BE-NEXT: stxv v5, 48(r8) -; CHECK-BE-NEXT: stxv v4, 32(r8) -; CHECK-BE-NEXT: stxv v3, 16(r8) -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 -; CHECK-BE-NEXT: stxv v5, 112(r8) -; CHECK-BE-NEXT: stxv v4, 96(r8) -; CHECK-BE-NEXT: stxv v3, 80(r8) -; CHECK-BE-NEXT: stxv v2, 64(r8) -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 176(r8) -; CHECK-BE-NEXT: stxv v4, 160(r8) -; CHECK-BE-NEXT: stxv v3, 144(r8) -; CHECK-BE-NEXT: stxv v2, 128(r8) +; CHECK-BE-NEXT: xxmfacc acc2 +; CHECK-BE-NEXT: xvf32gernp acc0, vs12, vs13 +; CHECK-BE-NEXT: stxvx vs8, r3, r7 +; CHECK-BE-NEXT: stxv vs9, 16(r8) +; CHECK-BE-NEXT: xxmfacc acc1 +; CHECK-BE-NEXT: stxv vs11, 48(r8) +; CHECK-BE-NEXT: stxv vs10, 32(r8) +; CHECK-BE-NEXT: stxv vs5, 80(r8) +; CHECK-BE-NEXT: stxv vs4, 64(r8) +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs7, 112(r8) +; CHECK-BE-NEXT: stxv vs6, 96(r8) +; CHECK-BE-NEXT: stxv vs1, 144(r8) +; CHECK-BE-NEXT: stxv vs0, 128(r8) +; CHECK-BE-NEXT: stxv vs3, 176(r8) +; CHECK-BE-NEXT: stxv vs2, 160(r8) ; CHECK-BE-NEXT: bdnz .LBB9_2 ; CHECK-BE-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: testcse4: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: cmpwi r4, 1 -; CHECK-PWR10-NEXT: bltlr cr0 -; CHECK-PWR10-NEXT: # %bb.1: # %for.body.preheader -; CHECK-PWR10-NEXT: clrldi r4, r4, 32 -; CHECK-PWR10-NEXT: li r6, 0 -; CHECK-PWR10-NEXT: mtctr r4 -; CHECK-PWR10-NEXT: li r4, 0 -; CHECK-PWR10-NEXT: .p2align 4 -; CHECK-PWR10-NEXT: .LBB9_2: # %for.body -; CHECK-PWR10-NEXT: # -; CHECK-PWR10-NEXT: rldic r7, r6, 4, 28 -; CHECK-PWR10-NEXT: xxsetaccz acc2 -; CHECK-PWR10-NEXT: xxsetaccz acc1 -; CHECK-PWR10-NEXT: addi r6, r6, 6 -; CHECK-PWR10-NEXT: add r8, r5, r7 -; CHECK-PWR10-NEXT: lxvx vs0, r5, r7 -; CHECK-PWR10-NEXT: rldic r7, r4, 6, 26 -; CHECK-PWR10-NEXT: addi r4, r4, 3 -; CHECK-PWR10-NEXT: lxv vs1, 16(r8) -; CHECK-PWR10-NEXT: xvf32gerpp acc2, vs0, vs1 -; CHECK-PWR10-NEXT: lxv vs0, 32(r8) -; CHECK-PWR10-NEXT: lxv vs1, 48(r8) -; CHECK-PWR10-NEXT: xvf32gerpn acc1, vs0, vs1 -; CHECK-PWR10-NEXT: lxv vs12, 64(r8) -; CHECK-PWR10-NEXT: lxv vs13, 80(r8) -; CHECK-PWR10-NEXT: xxsetaccz acc0 -; CHECK-PWR10-NEXT: add r8, r3, r7 -; CHECK-PWR10-NEXT: xxmfacc acc2 -; CHECK-PWR10-NEXT: xvf32gernp acc0, vs12, vs13 -; CHECK-PWR10-NEXT: stxvx vs11, r3, r7 -; CHECK-PWR10-NEXT: stxv vs8, 48(r8) -; CHECK-PWR10-NEXT: xxmfacc acc1 -; CHECK-PWR10-NEXT: stxv vs9, 32(r8) -; CHECK-PWR10-NEXT: stxv vs10, 16(r8) -; CHECK-PWR10-NEXT: stxv vs4, 112(r8) -; CHECK-PWR10-NEXT: stxv vs5, 96(r8) -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs6, 80(r8) -; CHECK-PWR10-NEXT: stxv vs7, 64(r8) -; CHECK-PWR10-NEXT: stxv vs0, 176(r8) -; CHECK-PWR10-NEXT: stxv vs1, 160(r8) -; CHECK-PWR10-NEXT: stxv vs2, 144(r8) -; CHECK-PWR10-NEXT: stxv vs3, 128(r8) -; CHECK-PWR10-NEXT: bdnz .LBB9_2 -; CHECK-PWR10-NEXT: # %bb.3: # %for.cond.cleanup -; CHECK-PWR10-NEXT: blr +; CHECK-WACC-LABEL: testcse4: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: cmpwi r4, 1 +; CHECK-WACC-NEXT: bltlr cr0 +; CHECK-WACC-NEXT: # %bb.1: # %for.body.preheader +; CHECK-WACC-NEXT: clrldi r4, r4, 32 +; CHECK-WACC-NEXT: mtctr r4 +; CHECK-WACC-NEXT: li r4, 0 +; CHECK-WACC-NEXT: li r6, 0 +; CHECK-WACC-NEXT: .p2align 4 +; CHECK-WACC-NEXT: .LBB9_2: # %for.body +; CHECK-WACC-NEXT: # +; CHECK-WACC-NEXT: rldic r7, r6, 4, 28 +; CHECK-WACC-NEXT: add r8, r5, r7 +; CHECK-WACC-NEXT: lxvx vs0, r5, r7 +; CHECK-WACC-NEXT: lxv vs1, 16(r8) +; CHECK-WACC-NEXT: dmxxsetaccz wacc2 +; CHECK-WACC-NEXT: dmxxsetaccz wacc1 +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: xvf32gerpp wacc2, vs0, vs1 +; CHECK-WACC-NEXT: lxv vs0, 32(r8) +; CHECK-WACC-NEXT: lxv vs1, 48(r8) +; CHECK-WACC-NEXT: rldic r7, r4, 6, 26 +; CHECK-WACC-NEXT: addi r4, r4, 3 +; CHECK-WACC-NEXT: addi r6, r6, 6 +; CHECK-WACC-NEXT: xvf32gerpn wacc1, vs0, vs1 +; CHECK-WACC-NEXT: lxv vs0, 64(r8) +; CHECK-WACC-NEXT: lxv vs1, 80(r8) +; CHECK-WACC-NEXT: add r8, r3, r7 +; CHECK-WACC-NEXT: xvf32gernp wacc0, vs0, vs1 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc2, 0 +; CHECK-WACC-NEXT: stxvx v3, r3, r7 +; CHECK-WACC-NEXT: stxv v4, 48(r8) +; CHECK-WACC-NEXT: stxv v5, 32(r8) +; CHECK-WACC-NEXT: stxv v2, 16(r8) +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-WACC-NEXT: stxv v4, 112(r8) +; CHECK-WACC-NEXT: stxv v5, 96(r8) +; CHECK-WACC-NEXT: stxv v2, 80(r8) +; CHECK-WACC-NEXT: stxv v3, 64(r8) +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 176(r8) +; CHECK-WACC-NEXT: stxv v5, 160(r8) +; CHECK-WACC-NEXT: stxv v2, 144(r8) +; CHECK-WACC-NEXT: stxv v3, 128(r8) +; CHECK-WACC-NEXT: bdnz .LBB9_2 +; CHECK-WACC-NEXT: # %bb.3: # %for.cond.cleanup +; CHECK-WACC-NEXT: blr ; -; CHECK-BE-PWR10-LABEL: testcse4: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: cmpwi r4, 1 -; CHECK-BE-PWR10-NEXT: bltlr cr0 -; CHECK-BE-PWR10-NEXT: # %bb.1: # %for.body.preheader -; CHECK-BE-PWR10-NEXT: clrldi r4, r4, 32 -; CHECK-BE-PWR10-NEXT: li r6, 0 -; CHECK-BE-PWR10-NEXT: mtctr r4 -; CHECK-BE-PWR10-NEXT: li r4, 0 -; CHECK-BE-PWR10-NEXT: .p2align 4 -; CHECK-BE-PWR10-NEXT: .LBB9_2: # %for.body -; CHECK-BE-PWR10-NEXT: # -; CHECK-BE-PWR10-NEXT: rldic r7, r6, 4, 28 -; CHECK-BE-PWR10-NEXT: xxsetaccz acc2 -; CHECK-BE-PWR10-NEXT: xxsetaccz acc1 -; CHECK-BE-PWR10-NEXT: addi r6, r6, 6 -; CHECK-BE-PWR10-NEXT: add r8, r5, r7 -; CHECK-BE-PWR10-NEXT: lxvx vs0, r5, r7 -; CHECK-BE-PWR10-NEXT: rldic r7, r4, 6, 26 -; CHECK-BE-PWR10-NEXT: addi r4, r4, 3 -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r8) -; CHECK-BE-PWR10-NEXT: xvf32gerpp acc2, vs0, vs1 -; CHECK-BE-PWR10-NEXT: lxv vs0, 32(r8) -; CHECK-BE-PWR10-NEXT: lxv vs1, 48(r8) -; CHECK-BE-PWR10-NEXT: xvf32gerpn acc1, vs0, vs1 -; CHECK-BE-PWR10-NEXT: lxv vs12, 64(r8) -; CHECK-BE-PWR10-NEXT: lxv vs13, 80(r8) -; CHECK-BE-PWR10-NEXT: xxsetaccz acc0 -; CHECK-BE-PWR10-NEXT: add r8, r3, r7 -; CHECK-BE-PWR10-NEXT: xxmfacc acc2 -; CHECK-BE-PWR10-NEXT: xvf32gernp acc0, vs12, vs13 -; CHECK-BE-PWR10-NEXT: stxvx vs8, r3, r7 -; CHECK-BE-PWR10-NEXT: stxv vs9, 16(r8) -; CHECK-BE-PWR10-NEXT: xxmfacc acc1 -; CHECK-BE-PWR10-NEXT: stxv vs11, 48(r8) -; CHECK-BE-PWR10-NEXT: stxv vs10, 32(r8) -; CHECK-BE-PWR10-NEXT: stxv vs5, 80(r8) -; CHECK-BE-PWR10-NEXT: stxv vs4, 64(r8) -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs7, 112(r8) -; CHECK-BE-PWR10-NEXT: stxv vs6, 96(r8) -; CHECK-BE-PWR10-NEXT: stxv vs1, 144(r8) -; CHECK-BE-PWR10-NEXT: stxv vs0, 128(r8) -; CHECK-BE-PWR10-NEXT: stxv vs3, 176(r8) -; CHECK-BE-PWR10-NEXT: stxv vs2, 160(r8) -; CHECK-BE-PWR10-NEXT: bdnz .LBB9_2 -; CHECK-BE-PWR10-NEXT: # %bb.3: # %for.cond.cleanup -; CHECK-BE-PWR10-NEXT: blr +; CHECK-BE-WACC-LABEL: testcse4: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: cmpwi r4, 1 +; CHECK-BE-WACC-NEXT: bltlr cr0 +; CHECK-BE-WACC-NEXT: # %bb.1: # %for.body.preheader +; CHECK-BE-WACC-NEXT: clrldi r4, r4, 32 +; CHECK-BE-WACC-NEXT: mtctr r4 +; CHECK-BE-WACC-NEXT: li r4, 0 +; CHECK-BE-WACC-NEXT: li r6, 0 +; CHECK-BE-WACC-NEXT: .p2align 4 +; CHECK-BE-WACC-NEXT: .LBB9_2: # %for.body +; CHECK-BE-WACC-NEXT: # +; CHECK-BE-WACC-NEXT: rldic r7, r6, 4, 28 +; CHECK-BE-WACC-NEXT: add r8, r5, r7 +; CHECK-BE-WACC-NEXT: lxvx vs0, r5, r7 +; CHECK-BE-WACC-NEXT: lxv vs1, 16(r8) +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc2 +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc1 +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: xvf32gerpp wacc2, vs0, vs1 +; CHECK-BE-WACC-NEXT: lxv vs0, 32(r8) +; CHECK-BE-WACC-NEXT: lxv vs1, 48(r8) +; CHECK-BE-WACC-NEXT: rldic r7, r4, 6, 26 +; CHECK-BE-WACC-NEXT: addi r4, r4, 3 +; CHECK-BE-WACC-NEXT: addi r6, r6, 6 +; CHECK-BE-WACC-NEXT: xvf32gerpn wacc1, vs0, vs1 +; CHECK-BE-WACC-NEXT: lxv vs0, 64(r8) +; CHECK-BE-WACC-NEXT: lxv vs1, 80(r8) +; CHECK-BE-WACC-NEXT: add r8, r3, r7 +; CHECK-BE-WACC-NEXT: xvf32gernp wacc0, vs0, vs1 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc2, 0 +; CHECK-BE-WACC-NEXT: stxvx v2, r3, r7 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r8) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r8) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r8) +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 112(r8) +; CHECK-BE-WACC-NEXT: stxv v4, 96(r8) +; CHECK-BE-WACC-NEXT: stxv v3, 80(r8) +; CHECK-BE-WACC-NEXT: stxv v2, 64(r8) +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 176(r8) +; CHECK-BE-WACC-NEXT: stxv v4, 160(r8) +; CHECK-BE-WACC-NEXT: stxv v3, 144(r8) +; CHECK-BE-WACC-NEXT: stxv v2, 128(r8) +; CHECK-BE-WACC-NEXT: bdnz .LBB9_2 +; CHECK-BE-WACC-NEXT: # %bb.3: # %for.cond.cleanup +; CHECK-BE-WACC-NEXT: blr entry: %cmp55 = icmp sgt i32 %lim, 0 br i1 %cmp55, label %for.body.preheader, label %for.cond.cleanup @@ -896,26 +896,27 @@ define void @testRedundantPrimeUnprime(ptr %dst, <16 x i8> %vc) nounwind { ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -112(r1) -; CHECK-NEXT: dmxxsetaccz wacc0 -; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0 -; CHECK-NEXT: stxv v0, 48(r3) -; CHECK-NEXT: stxv v1, 32(r3) -; CHECK-NEXT: stxv v4, 16(r3) -; CHECK-NEXT: stxv v5, 0(r3) -; CHECK-NEXT: xvf32gerpp wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; CHECK-NEXT: xxsetaccz acc0 +; CHECK-NEXT: xxsetaccz acc1 ; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: stxvp vsp36, 64(r1) -; CHECK-NEXT: stxvp vsp34, 32(r1) +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r3) +; CHECK-NEXT: stxv vs1, 32(r3) +; CHECK-NEXT: stxv vs2, 16(r3) +; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: xvf32gerpp acc1, v2, v2 +; CHECK-NEXT: xxmfacc acc1 +; CHECK-NEXT: stxv vs4, 80(r1) +; CHECK-NEXT: stxv vs5, 64(r1) +; CHECK-NEXT: stxv vs6, 48(r1) +; CHECK-NEXT: stxv vs7, 32(r1) ; CHECK-NEXT: bl testRedundantPrimeUnprimeF@notoc -; CHECK-NEXT: lxvp vsp34, 64(r1) -; CHECK-NEXT: lxvp vsp36, 32(r1) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 112(r30) -; CHECK-NEXT: stxv v5, 96(r30) -; CHECK-NEXT: stxv v2, 80(r30) -; CHECK-NEXT: stxv v3, 64(r30) +; CHECK-NEXT: lxvp vsp0, 64(r1) +; CHECK-NEXT: lxvp vsp2, 32(r1) +; CHECK-NEXT: stxv vs0, 112(r30) +; CHECK-NEXT: stxv vs1, 96(r30) +; CHECK-NEXT: stxv vs2, 80(r30) +; CHECK-NEXT: stxv vs3, 64(r30) ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -927,100 +928,99 @@ define void @testRedundantPrimeUnprime(ptr %dst, <16 x i8> %vc) nounwind { ; CHECK-BE-NEXT: mflr r0 ; CHECK-BE-NEXT: std r0, 16(r1) ; CHECK-BE-NEXT: stdu r1, -192(r1) -; CHECK-BE-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-NEXT: xxsetaccz acc0 +; CHECK-BE-NEXT: xxsetaccz acc1 ; CHECK-BE-NEXT: std r30, 176(r1) # 8-byte Folded Spill -; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0 -; CHECK-BE-NEXT: stxv v1, 48(r3) -; CHECK-BE-NEXT: stxv v0, 32(r3) -; CHECK-BE-NEXT: stxv v5, 16(r3) -; CHECK-BE-NEXT: stxv v4, 0(r3) -; CHECK-BE-NEXT: xvf32gerpp wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 ; CHECK-BE-NEXT: mr r30, r3 -; CHECK-BE-NEXT: stxvp vsp36, 112(r1) -; CHECK-BE-NEXT: stxvp vsp34, 144(r1) +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) +; CHECK-BE-NEXT: xvf32gerpp acc1, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc1 +; CHECK-BE-NEXT: stxv vs4, 112(r1) +; CHECK-BE-NEXT: stxv vs5, 128(r1) +; CHECK-BE-NEXT: stxv vs6, 144(r1) +; CHECK-BE-NEXT: stxv vs7, 160(r1) ; CHECK-BE-NEXT: bl testRedundantPrimeUnprimeF ; CHECK-BE-NEXT: nop -; CHECK-BE-NEXT: lxvp vsp34, 112(r1) -; CHECK-BE-NEXT: lxvp vsp36, 144(r1) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 112(r30) -; CHECK-BE-NEXT: stxv v4, 96(r30) -; CHECK-BE-NEXT: stxv v3, 80(r30) -; CHECK-BE-NEXT: stxv v2, 64(r30) +; CHECK-BE-NEXT: lxvp vsp0, 112(r1) +; CHECK-BE-NEXT: lxvp vsp2, 144(r1) +; CHECK-BE-NEXT: stxv vs3, 112(r30) +; CHECK-BE-NEXT: stxv vs2, 96(r30) +; CHECK-BE-NEXT: stxv vs1, 80(r30) +; CHECK-BE-NEXT: stxv vs0, 64(r30) ; CHECK-BE-NEXT: ld r30, 176(r1) # 8-byte Folded Reload ; CHECK-BE-NEXT: addi r1, r1, 192 ; CHECK-BE-NEXT: ld r0, 16(r1) ; CHECK-BE-NEXT: mtlr r0 ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: testRedundantPrimeUnprime: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: mflr r0 -; CHECK-PWR10-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-PWR10-NEXT: std r0, 16(r1) -; CHECK-PWR10-NEXT: stdu r1, -112(r1) -; CHECK-PWR10-NEXT: xxsetaccz acc0 -; CHECK-PWR10-NEXT: xxsetaccz acc1 -; CHECK-PWR10-NEXT: mr r30, r3 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r3) -; CHECK-PWR10-NEXT: stxv vs1, 32(r3) -; CHECK-PWR10-NEXT: stxv vs2, 16(r3) -; CHECK-PWR10-NEXT: stxv vs3, 0(r3) -; CHECK-PWR10-NEXT: xvf32gerpp acc1, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc1 -; CHECK-PWR10-NEXT: stxv vs4, 80(r1) -; CHECK-PWR10-NEXT: stxv vs5, 64(r1) -; CHECK-PWR10-NEXT: stxv vs6, 48(r1) -; CHECK-PWR10-NEXT: stxv vs7, 32(r1) -; CHECK-PWR10-NEXT: bl testRedundantPrimeUnprimeF@notoc -; CHECK-PWR10-NEXT: lxvp vsp0, 64(r1) -; CHECK-PWR10-NEXT: lxvp vsp2, 32(r1) -; CHECK-PWR10-NEXT: stxv vs0, 112(r30) -; CHECK-PWR10-NEXT: stxv vs1, 96(r30) -; CHECK-PWR10-NEXT: stxv vs2, 80(r30) -; CHECK-PWR10-NEXT: stxv vs3, 64(r30) -; CHECK-PWR10-NEXT: addi r1, r1, 112 -; CHECK-PWR10-NEXT: ld r0, 16(r1) -; CHECK-PWR10-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-PWR10-NEXT: mtlr r0 -; CHECK-PWR10-NEXT: blr +; CHECK-WACC-LABEL: testRedundantPrimeUnprime: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: mflr r0 +; CHECK-WACC-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-WACC-NEXT: std r0, 16(r1) +; CHECK-WACC-NEXT: stdu r1, -112(r1) +; CHECK-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0 +; CHECK-WACC-NEXT: stxv v0, 48(r3) +; CHECK-WACC-NEXT: stxv v1, 32(r3) +; CHECK-WACC-NEXT: stxv v4, 16(r3) +; CHECK-WACC-NEXT: stxv v5, 0(r3) +; CHECK-WACC-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; CHECK-WACC-NEXT: mr r30, r3 +; CHECK-WACC-NEXT: stxvp vsp36, 64(r1) +; CHECK-WACC-NEXT: stxvp vsp34, 32(r1) +; CHECK-WACC-NEXT: bl testRedundantPrimeUnprimeF@notoc +; CHECK-WACC-NEXT: lxvp vsp34, 64(r1) +; CHECK-WACC-NEXT: lxvp vsp36, 32(r1) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 112(r30) +; CHECK-WACC-NEXT: stxv v5, 96(r30) +; CHECK-WACC-NEXT: stxv v2, 80(r30) +; CHECK-WACC-NEXT: stxv v3, 64(r30) +; CHECK-WACC-NEXT: addi r1, r1, 112 +; CHECK-WACC-NEXT: ld r0, 16(r1) +; CHECK-WACC-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-WACC-NEXT: mtlr r0 +; CHECK-WACC-NEXT: blr ; -; CHECK-BE-PWR10-LABEL: testRedundantPrimeUnprime: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: mflr r0 -; CHECK-BE-PWR10-NEXT: std r0, 16(r1) -; CHECK-BE-PWR10-NEXT: stdu r1, -192(r1) -; CHECK-BE-PWR10-NEXT: xxsetaccz acc0 -; CHECK-BE-PWR10-NEXT: xxsetaccz acc1 -; CHECK-BE-PWR10-NEXT: std r30, 176(r1) # 8-byte Folded Spill -; CHECK-BE-PWR10-NEXT: mr r30, r3 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: xvf32gerpp acc1, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc1 -; CHECK-BE-PWR10-NEXT: stxv vs4, 112(r1) -; CHECK-BE-PWR10-NEXT: stxv vs5, 128(r1) -; CHECK-BE-PWR10-NEXT: stxv vs6, 144(r1) -; CHECK-BE-PWR10-NEXT: stxv vs7, 160(r1) -; CHECK-BE-PWR10-NEXT: bl testRedundantPrimeUnprimeF -; CHECK-BE-PWR10-NEXT: nop -; CHECK-BE-PWR10-NEXT: lxvp vsp0, 112(r1) -; CHECK-BE-PWR10-NEXT: lxvp vsp2, 144(r1) -; CHECK-BE-PWR10-NEXT: stxv vs3, 112(r30) -; CHECK-BE-PWR10-NEXT: stxv vs2, 96(r30) -; CHECK-BE-PWR10-NEXT: stxv vs1, 80(r30) -; CHECK-BE-PWR10-NEXT: stxv vs0, 64(r30) -; CHECK-BE-PWR10-NEXT: ld r30, 176(r1) # 8-byte Folded Reload -; CHECK-BE-PWR10-NEXT: addi r1, r1, 192 -; CHECK-BE-PWR10-NEXT: ld r0, 16(r1) -; CHECK-BE-PWR10-NEXT: mtlr r0 -; CHECK-BE-PWR10-NEXT: blr +; CHECK-BE-WACC-LABEL: testRedundantPrimeUnprime: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: mflr r0 +; CHECK-BE-WACC-NEXT: std r0, 16(r1) +; CHECK-BE-WACC-NEXT: stdu r1, -192(r1) +; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 +; CHECK-BE-WACC-NEXT: std r30, 176(r1) # 8-byte Folded Spill +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v1, 48(r3) +; CHECK-BE-WACC-NEXT: stxv v0, 32(r3) +; CHECK-BE-WACC-NEXT: stxv v5, 16(r3) +; CHECK-BE-WACC-NEXT: stxv v4, 0(r3) +; CHECK-BE-WACC-NEXT: xvf32gerpp wacc0, v2, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc0, 0 +; CHECK-BE-WACC-NEXT: mr r30, r3 +; CHECK-BE-WACC-NEXT: stxvp vsp36, 112(r1) +; CHECK-BE-WACC-NEXT: stxvp vsp34, 144(r1) +; CHECK-BE-WACC-NEXT: bl testRedundantPrimeUnprimeF +; CHECK-BE-WACC-NEXT: nop +; CHECK-BE-WACC-NEXT: lxvp vsp34, 112(r1) +; CHECK-BE-WACC-NEXT: lxvp vsp36, 144(r1) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 112(r30) +; CHECK-BE-WACC-NEXT: stxv v4, 96(r30) +; CHECK-BE-WACC-NEXT: stxv v3, 80(r30) +; CHECK-BE-WACC-NEXT: stxv v2, 64(r30) +; CHECK-BE-WACC-NEXT: ld r30, 176(r1) # 8-byte Folded Reload +; CHECK-BE-WACC-NEXT: addi r1, r1, 192 +; CHECK-BE-WACC-NEXT: ld r0, 16(r1) +; CHECK-BE-WACC-NEXT: mtlr r0 +; CHECK-BE-WACC-NEXT: blr entry: %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz() store <512 x i1> %0, ptr %dst, align 64 @@ -1038,67 +1038,67 @@ declare void @llvm.ppc.vsx.stxvp(<256 x i1>, ptr) define void @test_ldst_1(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, ptr nocapture %resp) { ; CHECK-LABEL: test_ldst_1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: plxvp vsp36, 8(r4), 0 -; CHECK-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_ldst_1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: plxvp vsp36, 8(r4), 0 -; CHECK-BE-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test_ldst_1: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: plxvp vsp36, 8(r4), 0 -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test_ldst_1: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: plxvp vsp36, 8(r4), 0 +; CHECK-WACC-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr ; -; CHECK-BE-PWR10-LABEL: test_ldst_1: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: plxvp vsp36, 8(r4), 0 -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr +; CHECK-BE-WACC-LABEL: test_ldst_1: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: plxvp vsp36, 8(r4), 0 +; CHECK-BE-WACC-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = getelementptr i8, ptr %vpp, i64 8 @@ -1112,67 +1112,67 @@ entry: define void @test_ldst_2(ptr nocapture readonly %vqp, ptr %vpp, <16 x i8> %vc, ptr nocapture %resp) { ; CHECK-LABEL: test_ldst_2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxvp vsp36, 0(r4) -; CHECK-NEXT: xvf64gernp wacc0, vsp36, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: xvf64gernp acc0, vsp36, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_ldst_2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxvp vsp36, 0(r4) -; CHECK-BE-NEXT: xvf64gernp wacc0, vsp36, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test_ldst_2: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: lxvp vsp36, 0(r4) -; CHECK-PWR10-NEXT: xvf64gernp acc0, vsp36, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test_ldst_2: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxvp vsp36, 0(r4) +; CHECK-WACC-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) +; CHECK-WACC-NEXT: stxv v3, 0(r7) +; CHECK-WACC-NEXT: blr ; -; CHECK-BE-PWR10-LABEL: test_ldst_2: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: lxvp vsp36, 0(r4) -; CHECK-BE-PWR10-NEXT: xvf64gernp acc0, vsp36, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr +; CHECK-BE-WACC-LABEL: test_ldst_2: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxvp vsp36, 0(r4) +; CHECK-BE-WACC-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r7) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %vpp) @@ -1185,67 +1185,67 @@ entry: define void @test_ldst_3(ptr nocapture readonly %vqp, i64 %offs, ptr %vpp, <16 x i8> %vc, ptr nocapture %resp) { ; CHECK-LABEL: test_ldst_3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxvp vsp36, 0(r5) -; CHECK-NEXT: xvf64gernp wacc0, vsp36, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r9) -; CHECK-NEXT: stxv v5, 32(r9) -; CHECK-NEXT: stxv v2, 16(r9) -; CHECK-NEXT: stxv v3, 0(r9) +; CHECK-NEXT: xvf64gernp acc0, vsp36, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r9) +; CHECK-NEXT: stxv vs1, 32(r9) +; CHECK-NEXT: stxv vs2, 16(r9) +; CHECK-NEXT: stxv vs3, 0(r9) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_ldst_3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxvp vsp36, 0(r5) -; CHECK-BE-NEXT: xvf64gernp wacc0, vsp36, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r9) -; CHECK-BE-NEXT: stxv v4, 32(r9) -; CHECK-BE-NEXT: stxv v3, 16(r9) -; CHECK-BE-NEXT: stxv v2, 0(r9) +; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r9) +; CHECK-BE-NEXT: stxv vs0, 0(r9) +; CHECK-BE-NEXT: stxv vs3, 48(r9) +; CHECK-BE-NEXT: stxv vs2, 32(r9) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test_ldst_3: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: lxvp vsp36, 0(r5) -; CHECK-PWR10-NEXT: xvf64gernp acc0, vsp36, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r9) -; CHECK-PWR10-NEXT: stxv vs1, 32(r9) -; CHECK-PWR10-NEXT: stxv vs2, 16(r9) -; CHECK-PWR10-NEXT: stxv vs3, 0(r9) -; CHECK-PWR10-NEXT: blr +; CHECK-WACC-LABEL: test_ldst_3: +; CHECK-WACC: # %bb.0: # %entry +; CHECK-WACC-NEXT: lxv v5, 0(r3) +; CHECK-WACC-NEXT: lxv v1, 32(r3) +; CHECK-WACC-NEXT: lxv v4, 16(r3) +; CHECK-WACC-NEXT: lxv v0, 48(r3) +; CHECK-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-WACC-NEXT: lxvp vsp36, 0(r5) +; CHECK-WACC-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-WACC-NEXT: stxv v4, 48(r9) +; CHECK-WACC-NEXT: stxv v5, 32(r9) +; CHECK-WACC-NEXT: stxv v2, 16(r9) +; CHECK-WACC-NEXT: stxv v3, 0(r9) +; CHECK-WACC-NEXT: blr ; -; CHECK-BE-PWR10-LABEL: test_ldst_3: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: lxvp vsp36, 0(r5) -; CHECK-BE-PWR10-NEXT: xvf64gernp acc0, vsp36, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r9) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r9) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r9) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r9) -; CHECK-BE-PWR10-NEXT: blr +; CHECK-BE-WACC-LABEL: test_ldst_3: +; CHECK-BE-WACC: # %bb.0: # %entry +; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) +; CHECK-BE-WACC-NEXT: lxv v1, 16(r3) +; CHECK-BE-WACC-NEXT: lxv v4, 32(r3) +; CHECK-BE-WACC-NEXT: lxv v0, 0(r3) +; CHECK-BE-WACC-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-WACC-NEXT: lxvp vsp36, 0(r5) +; CHECK-BE-WACC-NEXT: xvf64gernp wacc0, vsp36, v2 +; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r9) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r9) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r9) +; CHECK-BE-WACC-NEXT: stxv v2, 0(r9) +; CHECK-BE-WACC-NEXT: blr entry: %0 = load <512 x i1>, ptr %vqp, align 64 %1 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %vpp) diff --git a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll index c17617d2ac248..ff860b8d6ff22 100644 --- a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll +++ b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -mcpu=future -enable-subreg-liveness -ppc-asm-full-reg-names \ +; RUN: -mcpu=pwr10 -enable-subreg-liveness -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ -; RUN: -mcpu=future -enable-subreg-liveness -ppc-asm-full-reg-names \ +; RUN: -mcpu=pwr10 -enable-subreg-liveness -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -mcpu=pwr10 -enable-subreg-liveness -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-PWR10 +; RUN: -mcpu=future -enable-subreg-liveness -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-WACC ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ -; RUN: -mcpu=pwr10 -enable-subreg-liveness -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-PWR10 +; RUN: -mcpu=future -enable-subreg-liveness -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE-WACC declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>) @@ -20,19 +20,23 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i ; CHECK-NEXT: vmr v1, v4 ; CHECK-NEXT: vmr v4, v3 ; CHECK-NEXT: vmr v0, v2 -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: xvi4ger8pp wacc0, v2, v4 +; CHECK-NEXT: xxlor vs3, v5, v5 ; CHECK-NEXT: ld r3, 96(r1) -; CHECK-NEXT: xvf16ger2pp wacc0, v0, v1 +; CHECK-NEXT: xxlor vs2, v4, v4 +; CHECK-NEXT: xxlor vs0, v0, v0 +; CHECK-NEXT: xxlor vs1, v1, v1 +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvi4ger8pp acc0, v2, v3 +; CHECK-NEXT: xvf16ger2pp acc0, v2, v1 +; CHECK-NEXT: pmxvf32gerpn acc0, v3, v5, 0, 0 ; CHECK-NEXT: vmr v3, v2 ; CHECK-NEXT: vmr v2, v5 -; CHECK-NEXT: pmxvf32gerpn wacc0, v4, v5, 0, 0 -; CHECK-NEXT: pmxvf64gernp wacc0, vsp34, v0, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r3) -; CHECK-NEXT: stxv v5, 32(r3) -; CHECK-NEXT: stxv v2, 16(r3) -; CHECK-NEXT: stxv v3, 0(r3) +; CHECK-NEXT: pmxvf64gernp acc0, vsp34, v0, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r3) +; CHECK-NEXT: stxv vs1, 32(r3) +; CHECK-NEXT: stxv vs2, 16(r3) +; CHECK-NEXT: stxv vs3, 0(r3) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: intrinsics1: @@ -40,68 +44,25 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i ; CHECK-BE-NEXT: vmr v1, v4 ; CHECK-BE-NEXT: vmr v4, v3 ; CHECK-BE-NEXT: vmr v0, v2 -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: xvi4ger8pp wacc0, v2, v4 +; CHECK-BE-NEXT: xxlor vs3, v5, v5 ; CHECK-BE-NEXT: ld r3, 112(r1) -; CHECK-BE-NEXT: xvf16ger2pp wacc0, v0, v1 +; CHECK-BE-NEXT: xxlor vs2, v4, v4 +; CHECK-BE-NEXT: xxlor vs0, v0, v0 +; CHECK-BE-NEXT: xxlor vs1, v1, v1 +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v3 +; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v1 +; CHECK-BE-NEXT: pmxvf32gerpn acc0, v3, v5, 0, 0 ; CHECK-BE-NEXT: vmr v3, v2 ; CHECK-BE-NEXT: vmr v2, v5 -; CHECK-BE-NEXT: pmxvf32gerpn wacc0, v4, v5, 0, 0 -; CHECK-BE-NEXT: pmxvf64gernp wacc0, vsp34, v0, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r3) -; CHECK-BE-NEXT: stxv v4, 32(r3) -; CHECK-BE-NEXT: stxv v3, 16(r3) -; CHECK-BE-NEXT: stxv v2, 0(r3) +; CHECK-BE-NEXT: pmxvf64gernp acc0, vsp34, v0, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: intrinsics1: -; CHECK-PWR10: # %bb.0: -; CHECK-PWR10-NEXT: vmr v1, v4 -; CHECK-PWR10-NEXT: vmr v4, v3 -; CHECK-PWR10-NEXT: vmr v0, v2 -; CHECK-PWR10-NEXT: xxlor vs3, v5, v5 -; CHECK-PWR10-NEXT: ld r3, 96(r1) -; CHECK-PWR10-NEXT: xxlor vs2, v4, v4 -; CHECK-PWR10-NEXT: xxlor vs0, v0, v0 -; CHECK-PWR10-NEXT: xxlor vs1, v1, v1 -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: xvi4ger8pp acc0, v2, v3 -; CHECK-PWR10-NEXT: xvf16ger2pp acc0, v2, v1 -; CHECK-PWR10-NEXT: pmxvf32gerpn acc0, v3, v5, 0, 0 -; CHECK-PWR10-NEXT: vmr v3, v2 -; CHECK-PWR10-NEXT: vmr v2, v5 -; CHECK-PWR10-NEXT: pmxvf64gernp acc0, vsp34, v0, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r3) -; CHECK-PWR10-NEXT: stxv vs1, 32(r3) -; CHECK-PWR10-NEXT: stxv vs2, 16(r3) -; CHECK-PWR10-NEXT: stxv vs3, 0(r3) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: intrinsics1: -; CHECK-BE-PWR10: # %bb.0: -; CHECK-BE-PWR10-NEXT: vmr v1, v4 -; CHECK-BE-PWR10-NEXT: vmr v4, v3 -; CHECK-BE-PWR10-NEXT: vmr v0, v2 -; CHECK-BE-PWR10-NEXT: xxlor vs3, v5, v5 -; CHECK-BE-PWR10-NEXT: ld r3, 112(r1) -; CHECK-BE-PWR10-NEXT: xxlor vs2, v4, v4 -; CHECK-BE-PWR10-NEXT: xxlor vs0, v0, v0 -; CHECK-BE-PWR10-NEXT: xxlor vs1, v1, v1 -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: xvi4ger8pp acc0, v2, v3 -; CHECK-BE-PWR10-NEXT: xvf16ger2pp acc0, v2, v1 -; CHECK-BE-PWR10-NEXT: pmxvf32gerpn acc0, v3, v5, 0, 0 -; CHECK-BE-PWR10-NEXT: vmr v3, v2 -; CHECK-BE-PWR10-NEXT: vmr v2, v5 -; CHECK-BE-PWR10-NEXT: pmxvf64gernp acc0, vsp34, v0, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: intrinsics1: ; CHECK-WACC: # %bb.0: ; CHECK-WACC-NEXT: vmr v1, v4 @@ -121,6 +82,7 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i ; CHECK-WACC-NEXT: stxv v2, 16(r3) ; CHECK-WACC-NEXT: stxv v3, 0(r3) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: intrinsics1: ; CHECK-BE-WACC: # %bb.0: ; CHECK-BE-WACC-NEXT: vmr v1, v4 @@ -155,90 +117,51 @@ define void @intrinsics2(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4, ptr %ptr) { ; CHECK-LABEL: intrinsics2: ; CHECK: # %bb.0: ; CHECK-NEXT: lxv v2, 0(r3) -; CHECK-NEXT: lxv v4, 0(r5) ; CHECK-NEXT: lxv v3, 0(r4) +; CHECK-NEXT: xxlor vs0, v2, v2 +; CHECK-NEXT: lxv v4, 0(r5) ; CHECK-NEXT: lxv v5, 0(r6) +; CHECK-NEXT: xxlor vs2, v4, v4 +; CHECK-NEXT: xxlor vs3, v5, v5 +; CHECK-NEXT: xxlor vs1, v3, v3 ; CHECK-NEXT: vmr v1, v2 -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 -; CHECK-NEXT: xvi8ger4pp wacc0, v2, v3 -; CHECK-NEXT: xvf16ger2pn wacc0, v2, v4 ; CHECK-NEXT: vmr v0, v5 -; CHECK-NEXT: pmxvf32gernn wacc0, v3, v5, 0, 0 -; CHECK-NEXT: pmxvf64gernn wacc0, vsp32, v2, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v5, 0(r3) -; CHECK-NEXT: stxv v4, 0(r4) -; CHECK-NEXT: stxv v3, 0(r5) -; CHECK-NEXT: stxv v2, 0(r6) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvi8ger4pp acc0, v2, v3 +; CHECK-NEXT: xvf16ger2pn acc0, v2, v4 +; CHECK-NEXT: pmxvf32gernn acc0, v3, v5, 0, 0 +; CHECK-NEXT: pmxvf64gernn acc0, vsp32, v2, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs3, 0(r3) +; CHECK-NEXT: stxv vs2, 0(r4) +; CHECK-NEXT: stxv vs1, 0(r5) +; CHECK-NEXT: stxv vs0, 0(r6) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: intrinsics2: ; CHECK-BE: # %bb.0: ; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: lxv v4, 0(r5) ; CHECK-BE-NEXT: lxv v3, 0(r4) +; CHECK-BE-NEXT: xxlor vs0, v2, v2 +; CHECK-BE-NEXT: lxv v4, 0(r5) ; CHECK-BE-NEXT: lxv v5, 0(r6) +; CHECK-BE-NEXT: xxlor vs2, v4, v4 +; CHECK-BE-NEXT: xxlor vs3, v5, v5 +; CHECK-BE-NEXT: xxlor vs1, v3, v3 ; CHECK-BE-NEXT: vmr v1, v2 -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0 -; CHECK-BE-NEXT: xvi8ger4pp wacc0, v2, v3 -; CHECK-BE-NEXT: xvf16ger2pn wacc0, v2, v4 ; CHECK-BE-NEXT: vmr v0, v5 -; CHECK-BE-NEXT: pmxvf32gernn wacc0, v3, v5, 0, 0 -; CHECK-BE-NEXT: pmxvf64gernn wacc0, vsp32, v2, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v2, 0(r3) -; CHECK-BE-NEXT: stxv v3, 0(r4) -; CHECK-BE-NEXT: stxv v4, 0(r5) -; CHECK-BE-NEXT: stxv v5, 0(r6) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvi8ger4pp acc0, v2, v3 +; CHECK-BE-NEXT: xvf16ger2pn acc0, v2, v4 +; CHECK-BE-NEXT: pmxvf32gernn acc0, v3, v5, 0, 0 +; CHECK-BE-NEXT: pmxvf64gernn acc0, vsp32, v2, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs1, 0(r4) +; CHECK-BE-NEXT: stxv vs2, 0(r5) +; CHECK-BE-NEXT: stxv vs3, 0(r6) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: intrinsics2: -; CHECK-PWR10: # %bb.0: -; CHECK-PWR10-NEXT: lxv v2, 0(r3) -; CHECK-PWR10-NEXT: lxv v3, 0(r4) -; CHECK-PWR10-NEXT: xxlor vs0, v2, v2 -; CHECK-PWR10-NEXT: lxv v4, 0(r5) -; CHECK-PWR10-NEXT: lxv v5, 0(r6) -; CHECK-PWR10-NEXT: xxlor vs2, v4, v4 -; CHECK-PWR10-NEXT: xxlor vs3, v5, v5 -; CHECK-PWR10-NEXT: xxlor vs1, v3, v3 -; CHECK-PWR10-NEXT: vmr v1, v2 -; CHECK-PWR10-NEXT: vmr v0, v5 -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: xvi8ger4pp acc0, v2, v3 -; CHECK-PWR10-NEXT: xvf16ger2pn acc0, v2, v4 -; CHECK-PWR10-NEXT: pmxvf32gernn acc0, v3, v5, 0, 0 -; CHECK-PWR10-NEXT: pmxvf64gernn acc0, vsp32, v2, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs3, 0(r3) -; CHECK-PWR10-NEXT: stxv vs2, 0(r4) -; CHECK-PWR10-NEXT: stxv vs1, 0(r5) -; CHECK-PWR10-NEXT: stxv vs0, 0(r6) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: intrinsics2: -; CHECK-BE-PWR10: # %bb.0: -; CHECK-BE-PWR10-NEXT: lxv v2, 0(r3) -; CHECK-BE-PWR10-NEXT: lxv v3, 0(r4) -; CHECK-BE-PWR10-NEXT: xxlor vs0, v2, v2 -; CHECK-BE-PWR10-NEXT: lxv v4, 0(r5) -; CHECK-BE-PWR10-NEXT: lxv v5, 0(r6) -; CHECK-BE-PWR10-NEXT: xxlor vs2, v4, v4 -; CHECK-BE-PWR10-NEXT: xxlor vs3, v5, v5 -; CHECK-BE-PWR10-NEXT: xxlor vs1, v3, v3 -; CHECK-BE-PWR10-NEXT: vmr v1, v2 -; CHECK-BE-PWR10-NEXT: vmr v0, v5 -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: xvi8ger4pp acc0, v2, v3 -; CHECK-BE-PWR10-NEXT: xvf16ger2pn acc0, v2, v4 -; CHECK-BE-PWR10-NEXT: pmxvf32gernn acc0, v3, v5, 0, 0 -; CHECK-BE-PWR10-NEXT: pmxvf64gernn acc0, vsp32, v2, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: stxv vs1, 0(r4) -; CHECK-BE-PWR10-NEXT: stxv vs2, 0(r5) -; CHECK-BE-PWR10-NEXT: stxv vs3, 0(r6) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: intrinsics2: ; CHECK-WACC: # %bb.0: ; CHECK-WACC-NEXT: lxv v2, 0(r3) @@ -258,6 +181,7 @@ define void @intrinsics2(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4, ptr %ptr) { ; CHECK-WACC-NEXT: stxv v3, 0(r5) ; CHECK-WACC-NEXT: stxv v2, 0(r6) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: intrinsics2: ; CHECK-BE-WACC: # %bb.0: ; CHECK-BE-WACC-NEXT: lxv v2, 0(r3) @@ -302,43 +226,24 @@ define void @intrinsics2(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %ptr4, ptr %ptr) { define void @test1(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvi4ger8 wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: xvi4ger8 acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xvi4ger8 wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: xvi4ger8 acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test1: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: xvi4ger8 acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test1: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: xvi4ger8 acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test1: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: xvi4ger8 wacc0, v2, v2 @@ -348,6 +253,7 @@ define void @test1(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test1: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: xvi4ger8 wacc0, v2, v2 @@ -369,63 +275,34 @@ declare <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8>, <16 x i8>) define void @test2(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: xvi4ger8pp wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvi4ger8pp acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: xvi4ger8pp wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test2: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: xvi4ger8pp acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test2: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: xvi4ger8pp acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test2: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -440,6 +317,7 @@ define void @test2(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test2: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -467,43 +345,24 @@ declare <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>) define void @test3(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pmxvi4ger8 wacc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: pmxvi4ger8 acc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: pmxvi4ger8 wacc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: pmxvi4ger8 acc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test3: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: pmxvi4ger8 acc0, v2, v2, 0, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test3: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: pmxvi4ger8 acc0, v2, v2, 0, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test3: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: pmxvi4ger8 wacc0, v2, v2, 0, 0, 0 @@ -513,6 +372,7 @@ define void @test3(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test3: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: pmxvi4ger8 wacc0, v2, v2, 0, 0, 0 @@ -534,63 +394,34 @@ declare <512 x i1> @llvm.ppc.mma.pmxvi4ger8(<16 x i8>, <16 x i8>, i32, i32, i32) define void @test4(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: pmxvi4ger8pp wacc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: pmxvi4ger8pp acc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test4: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: pmxvi4ger8pp wacc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: pmxvi4ger8pp acc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test4: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: pmxvi4ger8pp acc0, v2, v2, 0, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test4: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: pmxvi4ger8pp acc0, v2, v2, 0, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test4: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -605,6 +436,7 @@ define void @test4(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test4: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -632,43 +464,24 @@ declare <512 x i1> @llvm.ppc.mma.pmxvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>, define void @test5(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvi8ger4 wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: xvi8ger4 acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test5: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xvi8ger4 wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: xvi8ger4 acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test5: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: xvi8ger4 acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test5: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: xvi8ger4 acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test5: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: xvi8ger4 wacc0, v2, v2 @@ -678,6 +491,7 @@ define void @test5(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test5: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: xvi8ger4 wacc0, v2, v2 @@ -699,63 +513,34 @@ declare <512 x i1> @llvm.ppc.mma.xvi8ger4(<16 x i8>, <16 x i8>) define void @test6(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: xvi8ger4pp wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvi8ger4pp acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test6: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: xvi8ger4pp wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvi8ger4pp acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test6: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: xvi8ger4pp acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test6: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: xvi8ger4pp acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test6: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -770,6 +555,7 @@ define void @test6(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test6: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -797,43 +583,24 @@ declare <512 x i1> @llvm.ppc.mma.xvi8ger4pp(<512 x i1>, <16 x i8>, <16 x i8>) define void @test7(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pmxvi8ger4 wacc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: pmxvi8ger4 acc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test7: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: pmxvi8ger4 wacc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: pmxvi8ger4 acc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test7: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: pmxvi8ger4 acc0, v2, v2, 0, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test7: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: pmxvi8ger4 acc0, v2, v2, 0, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test7: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: pmxvi8ger4 wacc0, v2, v2, 0, 0, 0 @@ -843,6 +610,7 @@ define void @test7(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test7: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: pmxvi8ger4 wacc0, v2, v2, 0, 0, 0 @@ -864,63 +632,34 @@ declare <512 x i1> @llvm.ppc.mma.pmxvi8ger4(<16 x i8>, <16 x i8>, i32, i32, i32) define void @test8(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: pmxvi8ger4pp wacc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: pmxvi8ger4pp acc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test8: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: pmxvi8ger4pp wacc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: pmxvi8ger4pp acc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test8: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: pmxvi8ger4pp acc0, v2, v2, 0, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test8: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: pmxvi8ger4pp acc0, v2, v2, 0, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test8: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -935,6 +674,7 @@ define void @test8(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test8: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -962,43 +702,24 @@ declare <512 x i1> @llvm.ppc.mma.pmxvi8ger4pp(<512 x i1>, <16 x i8>, <16 x i8>, define void @test9(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test9: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvi16ger2s wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: xvi16ger2s acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test9: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xvi16ger2s wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: xvi16ger2s acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test9: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: xvi16ger2s acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test9: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: xvi16ger2s acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test9: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: xvi16ger2s wacc0, v2, v2 @@ -1008,6 +729,7 @@ define void @test9(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test9: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: xvi16ger2s wacc0, v2, v2 @@ -1029,63 +751,34 @@ declare <512 x i1> @llvm.ppc.mma.xvi16ger2s(<16 x i8>, <16 x i8>) define void @test10(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test10: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: xvi16ger2spp wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvi16ger2spp acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test10: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: xvi16ger2spp wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvi16ger2spp acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test10: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: xvi16ger2spp acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test10: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: xvi16ger2spp acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test10: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -1100,6 +793,7 @@ define void @test10(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test10: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -1127,43 +821,24 @@ declare <512 x i1> @llvm.ppc.mma.xvi16ger2spp(<512 x i1>, <16 x i8>, <16 x i8>) define void @test11(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test11: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pmxvi16ger2s wacc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: pmxvi16ger2s acc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test11: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: pmxvi16ger2s wacc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: pmxvi16ger2s acc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test11: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: pmxvi16ger2s acc0, v2, v2, 0, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test11: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: pmxvi16ger2s acc0, v2, v2, 0, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test11: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: pmxvi16ger2s wacc0, v2, v2, 0, 0, 0 @@ -1173,6 +848,7 @@ define void @test11(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test11: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: pmxvi16ger2s wacc0, v2, v2, 0, 0, 0 @@ -1194,63 +870,34 @@ declare <512 x i1> @llvm.ppc.mma.pmxvi16ger2s(<16 x i8>, <16 x i8>, i32, i32, i3 define void @test12(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test12: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: pmxvi16ger2spp wacc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: pmxvi16ger2spp acc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test12: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: pmxvi16ger2spp wacc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: pmxvi16ger2spp acc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test12: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: pmxvi16ger2spp acc0, v2, v2, 0, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test12: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: pmxvi16ger2spp acc0, v2, v2, 0, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test12: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -1265,6 +912,7 @@ define void @test12(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test12: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -1292,43 +940,24 @@ declare <512 x i1> @llvm.ppc.mma.pmxvi16ger2spp(<512 x i1>, <16 x i8>, <16 x i8> define void @test13(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test13: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvf16ger2 wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: xvf16ger2 acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test13: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xvf16ger2 wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: xvf16ger2 acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test13: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: xvf16ger2 acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test13: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: xvf16ger2 acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test13: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: xvf16ger2 wacc0, v2, v2 @@ -1338,6 +967,7 @@ define void @test13(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test13: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: xvf16ger2 wacc0, v2, v2 @@ -1359,63 +989,34 @@ declare <512 x i1> @llvm.ppc.mma.xvf16ger2(<16 x i8>, <16 x i8>) define void @test14(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test14: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: xvf16ger2pp wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvf16ger2pp acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test14: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: xvf16ger2pp wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test14: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: xvf16ger2pp acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test14: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: xvf16ger2pp acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test14: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -1430,6 +1031,7 @@ define void @test14(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test14: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -1457,63 +1059,34 @@ declare <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>) define void @test15(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test15: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: xvf16ger2pn wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvf16ger2pn acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test15: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: xvf16ger2pn wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvf16ger2pn acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test15: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: xvf16ger2pn acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test15: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: xvf16ger2pn acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test15: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -1528,6 +1101,7 @@ define void @test15(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test15: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -1555,63 +1129,34 @@ declare <512 x i1> @llvm.ppc.mma.xvf16ger2pn(<512 x i1>, <16 x i8>, <16 x i8>) define void @test16(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: xvf16ger2np wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvf16ger2np acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test16: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: xvf16ger2np wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvf16ger2np acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test16: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: xvf16ger2np acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test16: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: xvf16ger2np acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test16: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -1626,6 +1171,7 @@ define void @test16(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test16: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -1653,63 +1199,34 @@ declare <512 x i1> @llvm.ppc.mma.xvf16ger2np(<512 x i1>, <16 x i8>, <16 x i8>) define void @test17(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test17: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: xvf16ger2nn wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvf16ger2nn acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test17: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: xvf16ger2nn wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvf16ger2nn acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test17: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: xvf16ger2nn acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test17: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: xvf16ger2nn acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test17: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -1724,6 +1241,7 @@ define void @test17(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test17: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -1751,43 +1269,24 @@ declare <512 x i1> @llvm.ppc.mma.xvf16ger2nn(<512 x i1>, <16 x i8>, <16 x i8>) define void @test18(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test18: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pmxvf16ger2 wacc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: pmxvf16ger2 acc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test18: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: pmxvf16ger2 wacc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: pmxvf16ger2 acc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test18: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: pmxvf16ger2 acc0, v2, v2, 0, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test18: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: pmxvf16ger2 acc0, v2, v2, 0, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test18: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: pmxvf16ger2 wacc0, v2, v2, 0, 0, 0 @@ -1797,6 +1296,7 @@ define void @test18(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test18: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: pmxvf16ger2 wacc0, v2, v2, 0, 0, 0 @@ -1818,63 +1318,34 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2(<16 x i8>, <16 x i8>, i32, i32, i32 define void @test19(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test19: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: pmxvf16ger2pp wacc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: pmxvf16ger2pp acc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test19: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: pmxvf16ger2pp wacc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: pmxvf16ger2pp acc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test19: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: pmxvf16ger2pp acc0, v2, v2, 0, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test19: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: pmxvf16ger2pp acc0, v2, v2, 0, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test19: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -1889,6 +1360,7 @@ define void @test19(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test19: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -1916,63 +1388,34 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>, define void @test20(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test20: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: pmxvf16ger2pn wacc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: pmxvf16ger2pn acc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test20: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: pmxvf16ger2pn wacc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: pmxvf16ger2pn acc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test20: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: pmxvf16ger2pn acc0, v2, v2, 0, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test20: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: pmxvf16ger2pn acc0, v2, v2, 0, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test20: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -1987,6 +1430,7 @@ define void @test20(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test20: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -2014,63 +1458,34 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2pn(<512 x i1>, <16 x i8>, <16 x i8>, define void @test21(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test21: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: pmxvf16ger2np wacc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: pmxvf16ger2np acc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test21: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: pmxvf16ger2np wacc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: pmxvf16ger2np acc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test21: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: pmxvf16ger2np acc0, v2, v2, 0, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test21: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: pmxvf16ger2np acc0, v2, v2, 0, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test21: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -2085,6 +1500,7 @@ define void @test21(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test21: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -2112,63 +1528,34 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2np(<512 x i1>, <16 x i8>, <16 x i8>, define void @test22(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test22: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: pmxvf16ger2nn wacc0, v2, v2, 0, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: pmxvf16ger2nn acc0, v2, v2, 0, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test22: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: pmxvf16ger2nn wacc0, v2, v2, 0, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: pmxvf16ger2nn acc0, v2, v2, 0, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test22: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: pmxvf16ger2nn acc0, v2, v2, 0, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test22: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: pmxvf16ger2nn acc0, v2, v2, 0, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test22: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -2183,6 +1570,7 @@ define void @test22(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test22: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -2210,43 +1598,24 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf16ger2nn(<512 x i1>, <16 x i8>, <16 x i8>, define void @test23(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test23: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvf32ger wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: xvf32ger acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test23: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xvf32ger wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: xvf32ger acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test23: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: xvf32ger acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test23: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: xvf32ger acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test23: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: xvf32ger wacc0, v2, v2 @@ -2256,6 +1625,7 @@ define void @test23(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test23: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: xvf32ger wacc0, v2, v2 @@ -2277,63 +1647,34 @@ declare <512 x i1> @llvm.ppc.mma.xvf32ger(<16 x i8>, <16 x i8>) define void @test24(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test24: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: xvf32gerpp wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvf32gerpp acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test24: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: xvf32gerpp wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvf32gerpp acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test24: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: xvf32gerpp acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test24: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: xvf32gerpp acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test24: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -2348,6 +1689,7 @@ define void @test24(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test24: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -2375,63 +1717,34 @@ declare <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>) define void @test25(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test25: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: xvf32gerpn wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvf32gerpn acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test25: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: xvf32gerpn wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvf32gerpn acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test25: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: xvf32gerpn acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test25: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: xvf32gerpn acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test25: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -2446,6 +1759,7 @@ define void @test25(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test25: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -2473,63 +1787,34 @@ declare <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1>, <16 x i8>, <16 x i8>) define void @test26(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test26: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: xvf32gernp wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvf32gernp acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test26: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: xvf32gernp wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvf32gernp acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test26: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: xvf32gernp acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test26: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: xvf32gernp acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test26: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -2544,6 +1829,7 @@ define void @test26(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test26: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -2571,63 +1857,34 @@ declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>) define void @test27(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test27: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: xvf32gernn wacc0, v2, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: xvf32gernn acc0, v2, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test27: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: xvf32gernn wacc0, v2, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: xvf32gernn acc0, v2, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test27: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: xvf32gernn acc0, v2, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test27: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: xvf32gernn acc0, v2, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test27: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -2642,6 +1899,7 @@ define void @test27(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test27: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -2669,43 +1927,24 @@ declare <512 x i1> @llvm.ppc.mma.xvf32gernn(<512 x i1>, <16 x i8>, <16 x i8>) define void @test28(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test28: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pmxvf32ger wacc0, v2, v2, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: pmxvf32ger acc0, v2, v2, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test28: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: pmxvf32ger wacc0, v2, v2, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: pmxvf32ger acc0, v2, v2, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test28: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: pmxvf32ger acc0, v2, v2, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test28: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: pmxvf32ger acc0, v2, v2, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test28: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: pmxvf32ger wacc0, v2, v2, 0, 0 @@ -2715,6 +1954,7 @@ define void @test28(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test28: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: pmxvf32ger wacc0, v2, v2, 0, 0 @@ -2736,63 +1976,34 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf32ger(<16 x i8>, <16 x i8>, i32, i32) define void @test29(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test29: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: pmxvf32gerpp wacc0, v2, v2, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: pmxvf32gerpp acc0, v2, v2, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test29: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: pmxvf32gerpp wacc0, v2, v2, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: pmxvf32gerpp acc0, v2, v2, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test29: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: pmxvf32gerpp acc0, v2, v2, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test29: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: pmxvf32gerpp acc0, v2, v2, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test29: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -2807,6 +2018,7 @@ define void @test29(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test29: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -2834,63 +2046,34 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>, define void @test30(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test30: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: pmxvf32gerpn wacc0, v2, v2, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: pmxvf32gerpn acc0, v2, v2, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test30: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: pmxvf32gerpn wacc0, v2, v2, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: pmxvf32gerpn acc0, v2, v2, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test30: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: pmxvf32gerpn acc0, v2, v2, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test30: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: pmxvf32gerpn acc0, v2, v2, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test30: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -2905,6 +2088,7 @@ define void @test30(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test30: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -2932,63 +2116,34 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf32gerpn(<512 x i1>, <16 x i8>, <16 x i8>, define void @test31(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test31: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: pmxvf32gernp wacc0, v2, v2, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: pmxvf32gernp acc0, v2, v2, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test31: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: pmxvf32gernp wacc0, v2, v2, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: pmxvf32gernp acc0, v2, v2, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test31: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: pmxvf32gernp acc0, v2, v2, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test31: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: pmxvf32gernp acc0, v2, v2, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test31: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -3003,6 +2158,7 @@ define void @test31(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test31: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -3030,63 +2186,34 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>, define void @test32(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-NEXT: pmxvf32gernn wacc0, v2, v2, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) +; CHECK-NEXT: xxmtacc acc0 +; CHECK-NEXT: pmxvf32gernn acc0, v2, v2, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test32: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 -; CHECK-BE-NEXT: pmxvf32gernn wacc0, v2, v2, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxmtacc acc0 +; CHECK-BE-NEXT: pmxvf32gernn acc0, v2, v2, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test32: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: pmxvf32gernn acc0, v2, v2, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test32: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: pmxvf32gernn acc0, v2, v2, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test32: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -3101,6 +2228,7 @@ define void @test32(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test32: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -3130,49 +2258,26 @@ define void @test33(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv v4, 16(r4) ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: xvf64ger wacc0, vsp36, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: xvf64ger acc0, vsp36, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test33: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv v4, 0(r4) ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: xvf64ger wacc0, vsp36, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: xvf64ger acc0, vsp36, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test33: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv v4, 16(r4) -; CHECK-PWR10-NEXT: lxv v5, 0(r4) -; CHECK-PWR10-NEXT: xvf64ger acc0, vsp36, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test33: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) -; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) -; CHECK-BE-PWR10-NEXT: xvf64ger acc0, vsp36, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test33: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v4, 16(r4) @@ -3184,6 +2289,7 @@ define void @test33(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test33: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) @@ -3208,71 +2314,38 @@ declare <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1>, <16 x i8>) define void @test34(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test34: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv v4, 16(r4) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: xvf64gerpp wacc0, vsp36, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: xvf64gerpp acc0, vsp36, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test34: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: xvf64gerpp wacc0, vsp36, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: xvf64gerpp acc0, vsp36, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test34: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: lxv v4, 16(r4) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: lxv v5, 0(r4) -; CHECK-PWR10-NEXT: xvf64gerpp acc0, vsp36, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test34: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) -; CHECK-BE-PWR10-NEXT: xvf64gerpp acc0, vsp36, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test34: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -3289,6 +2362,7 @@ define void @test34(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test34: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -3319,71 +2393,38 @@ declare <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>) define void @test35(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test35: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv v4, 16(r4) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: xvf64gerpn wacc0, vsp36, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: xvf64gerpn acc0, vsp36, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test35: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: xvf64gerpn wacc0, vsp36, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: xvf64gerpn acc0, vsp36, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test35: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: lxv v4, 16(r4) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: lxv v5, 0(r4) -; CHECK-PWR10-NEXT: xvf64gerpn acc0, vsp36, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test35: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) -; CHECK-BE-PWR10-NEXT: xvf64gerpn acc0, vsp36, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test35: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -3400,6 +2441,7 @@ define void @test35(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test35: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -3430,71 +2472,38 @@ declare <512 x i1> @llvm.ppc.mma.xvf64gerpn(<512 x i1>, <256 x i1>, <16 x i8>) define void @test36(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test36: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv v4, 16(r4) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: xvf64gernp wacc0, vsp36, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: xvf64gernp acc0, vsp36, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test36: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: xvf64gernp wacc0, vsp36, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: xvf64gernp acc0, vsp36, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test36: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: lxv v4, 16(r4) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: lxv v5, 0(r4) -; CHECK-PWR10-NEXT: xvf64gernp acc0, vsp36, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test36: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) -; CHECK-BE-PWR10-NEXT: xvf64gernp acc0, vsp36, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test36: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -3511,6 +2520,7 @@ define void @test36(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test36: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -3541,71 +2551,38 @@ declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>) define void @test37(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test37: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv v4, 16(r4) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: xvf64gernn wacc0, vsp36, v2 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: xvf64gernn acc0, vsp36, v2 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test37: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: xvf64gernn wacc0, vsp36, v2 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: xvf64gernn acc0, vsp36, v2 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test37: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: lxv v4, 16(r4) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: lxv v5, 0(r4) -; CHECK-PWR10-NEXT: xvf64gernn acc0, vsp36, v2 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test37: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) -; CHECK-BE-PWR10-NEXT: xvf64gernn acc0, vsp36, v2 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test37: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -3622,6 +2599,7 @@ define void @test37(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test37: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -3654,49 +2632,26 @@ define void @test38(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lxv v4, 16(r4) ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: pmxvf64ger wacc0, vsp36, v2, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: pmxvf64ger acc0, vsp36, v2, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test38: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv v4, 0(r4) ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: pmxvf64ger wacc0, vsp36, v2, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: pmxvf64ger acc0, vsp36, v2, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test38: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv v4, 16(r4) -; CHECK-PWR10-NEXT: lxv v5, 0(r4) -; CHECK-PWR10-NEXT: pmxvf64ger acc0, vsp36, v2, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test38: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) -; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) -; CHECK-BE-PWR10-NEXT: pmxvf64ger acc0, vsp36, v2, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test38: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v4, 16(r4) @@ -3708,6 +2663,7 @@ define void @test38(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test38: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v4, 0(r4) @@ -3732,71 +2688,38 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf64ger(<256 x i1>, <16 x i8>, i32, i32) define void @test39(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test39: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv v4, 16(r4) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: pmxvf64gerpp wacc0, vsp36, v2, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: pmxvf64gerpp acc0, vsp36, v2, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test39: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: pmxvf64gerpp wacc0, vsp36, v2, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: pmxvf64gerpp acc0, vsp36, v2, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test39: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: lxv v4, 16(r4) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: lxv v5, 0(r4) -; CHECK-PWR10-NEXT: pmxvf64gerpp acc0, vsp36, v2, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test39: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) -; CHECK-BE-PWR10-NEXT: pmxvf64gerpp acc0, vsp36, v2, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test39: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -3813,6 +2736,7 @@ define void @test39(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test39: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -3843,71 +2767,38 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>, define void @test40(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test40: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv v4, 16(r4) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: pmxvf64gerpn wacc0, vsp36, v2, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: pmxvf64gerpn acc0, vsp36, v2, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test40: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: pmxvf64gerpn wacc0, vsp36, v2, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: pmxvf64gerpn acc0, vsp36, v2, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test40: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: lxv v4, 16(r4) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: lxv v5, 0(r4) -; CHECK-PWR10-NEXT: pmxvf64gerpn acc0, vsp36, v2, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test40: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) -; CHECK-BE-PWR10-NEXT: pmxvf64gerpn acc0, vsp36, v2, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test40: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -3924,6 +2815,7 @@ define void @test40(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test40: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -3954,71 +2846,38 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf64gerpn(<512 x i1>, <256 x i1>, <16 x i8>, define void @test41(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test41: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv v4, 16(r4) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: pmxvf64gernp wacc0, vsp36, v2, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: pmxvf64gernp acc0, vsp36, v2, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test41: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: pmxvf64gernp wacc0, vsp36, v2, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: pmxvf64gernp acc0, vsp36, v2, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test41: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: lxv v4, 16(r4) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: lxv v5, 0(r4) -; CHECK-PWR10-NEXT: pmxvf64gernp acc0, vsp36, v2, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test41: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) -; CHECK-BE-PWR10-NEXT: pmxvf64gernp acc0, vsp36, v2, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test41: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -4035,6 +2894,7 @@ define void @test41(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test41: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) @@ -4065,71 +2925,38 @@ declare <512 x i1> @llvm.ppc.mma.pmxvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>, define void @test42(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-LABEL: test42: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lxv v5, 0(r3) -; CHECK-NEXT: lxv v1, 32(r3) -; CHECK-NEXT: lxv v4, 16(r3) -; CHECK-NEXT: lxv v0, 48(r3) -; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-NEXT: lxv vs3, 0(r3) +; CHECK-NEXT: lxv vs2, 16(r3) +; CHECK-NEXT: lxv vs1, 32(r3) +; CHECK-NEXT: lxv vs0, 48(r3) ; CHECK-NEXT: lxv v4, 16(r4) +; CHECK-NEXT: xxmtacc acc0 ; CHECK-NEXT: lxv v5, 0(r4) -; CHECK-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 -; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-NEXT: stxv v4, 48(r7) -; CHECK-NEXT: stxv v5, 32(r7) -; CHECK-NEXT: stxv v2, 16(r7) -; CHECK-NEXT: stxv v3, 0(r7) +; CHECK-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 +; CHECK-NEXT: xxmfacc acc0 +; CHECK-NEXT: stxv vs0, 48(r7) +; CHECK-NEXT: stxv vs1, 32(r7) +; CHECK-NEXT: stxv vs2, 16(r7) +; CHECK-NEXT: stxv vs3, 0(r7) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test42: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v5, 48(r3) -; CHECK-BE-NEXT: lxv v1, 16(r3) -; CHECK-BE-NEXT: lxv v4, 32(r3) -; CHECK-BE-NEXT: lxv v0, 0(r3) -; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp32, vsp36, 0 +; CHECK-BE-NEXT: lxv vs3, 48(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: xxmtacc acc0 ; CHECK-BE-NEXT: lxv v5, 16(r4) -; CHECK-BE-NEXT: pmxvf64gernn wacc0, vsp36, v2, 0, 0 -; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-NEXT: stxv v5, 48(r7) -; CHECK-BE-NEXT: stxv v4, 32(r7) -; CHECK-BE-NEXT: stxv v3, 16(r7) -; CHECK-BE-NEXT: stxv v2, 0(r7) +; CHECK-BE-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 +; CHECK-BE-NEXT: xxmfacc acc0 +; CHECK-BE-NEXT: stxv vs1, 16(r7) +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: stxv vs3, 48(r7) +; CHECK-BE-NEXT: stxv vs2, 32(r7) ; CHECK-BE-NEXT: blr ; -; CHECK-PWR10-LABEL: test42: -; CHECK-PWR10: # %bb.0: # %entry -; CHECK-PWR10-NEXT: lxv vs3, 0(r3) -; CHECK-PWR10-NEXT: lxv vs2, 16(r3) -; CHECK-PWR10-NEXT: lxv vs1, 32(r3) -; CHECK-PWR10-NEXT: lxv vs0, 48(r3) -; CHECK-PWR10-NEXT: lxv v4, 16(r4) -; CHECK-PWR10-NEXT: xxmtacc acc0 -; CHECK-PWR10-NEXT: lxv v5, 0(r4) -; CHECK-PWR10-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 -; CHECK-PWR10-NEXT: xxmfacc acc0 -; CHECK-PWR10-NEXT: stxv vs0, 48(r7) -; CHECK-PWR10-NEXT: stxv vs1, 32(r7) -; CHECK-PWR10-NEXT: stxv vs2, 16(r7) -; CHECK-PWR10-NEXT: stxv vs3, 0(r7) -; CHECK-PWR10-NEXT: blr -; -; CHECK-BE-PWR10-LABEL: test42: -; CHECK-BE-PWR10: # %bb.0: # %entry -; CHECK-BE-PWR10-NEXT: lxv vs3, 48(r3) -; CHECK-BE-PWR10-NEXT: lxv vs2, 32(r3) -; CHECK-BE-PWR10-NEXT: lxv vs1, 16(r3) -; CHECK-BE-PWR10-NEXT: lxv vs0, 0(r3) -; CHECK-BE-PWR10-NEXT: lxv v4, 0(r4) -; CHECK-BE-PWR10-NEXT: xxmtacc acc0 -; CHECK-BE-PWR10-NEXT: lxv v5, 16(r4) -; CHECK-BE-PWR10-NEXT: pmxvf64gernn acc0, vsp36, v2, 0, 0 -; CHECK-BE-PWR10-NEXT: xxmfacc acc0 -; CHECK-BE-PWR10-NEXT: stxv vs1, 16(r7) -; CHECK-BE-PWR10-NEXT: stxv vs0, 0(r7) -; CHECK-BE-PWR10-NEXT: stxv vs3, 48(r7) -; CHECK-BE-PWR10-NEXT: stxv vs2, 32(r7) -; CHECK-BE-PWR10-NEXT: blr ; CHECK-WACC-LABEL: test42: ; CHECK-WACC: # %bb.0: # %entry ; CHECK-WACC-NEXT: lxv v5, 0(r3) @@ -4146,6 +2973,7 @@ define void @test42(ptr %vqp, ptr %vpp, <16 x i8> %vc, ptr %resp) { ; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: stxv v3, 0(r7) ; CHECK-WACC-NEXT: blr +; ; CHECK-BE-WACC-LABEL: test42: ; CHECK-BE-WACC: # %bb.0: # %entry ; CHECK-BE-WACC-NEXT: lxv v5, 48(r3) From e44d0fce5706d7cd0e849e0de209cbd23a52225c Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Thu, 30 Oct 2025 17:22:34 -0500 Subject: [PATCH 5/5] udpate phi-liveness test --- .../PowerPC/peephole-mma-phi-liveness.ll | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/llvm/test/CodeGen/PowerPC/peephole-mma-phi-liveness.ll b/llvm/test/CodeGen/PowerPC/peephole-mma-phi-liveness.ll index 291cf97fd009e..929bf5f61dd90 100644 --- a/llvm/test/CodeGen/PowerPC/peephole-mma-phi-liveness.ll +++ b/llvm/test/CodeGen/PowerPC/peephole-mma-phi-liveness.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc64-ibm-aix < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=future \ +; RUN: -mtriple=powerpc64-ibm-aix < %s | FileCheck %s --check-prefix=CHECK-WACC target datalayout = "E-m:a-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512" @@ -38,6 +40,43 @@ define void @baz(i64 %arg) local_unnamed_addr #0 { ; CHECK-NEXT: xxswapd 0, 0 ; CHECK-NEXT: stxv 0, 0(3) ; CHECK-NEXT: blr +; +; CHECK-WACC-LABEL: baz: +; CHECK-WACC: # %bb.0: # %bb +; CHECK-WACC-NEXT: dmxxextfdmr512 34, 36, 0, 0 +; CHECK-WACC-NEXT: xxmrgld 1, 34, 36 +; CHECK-WACC-NEXT: xxswapd 2, 1 +; CHECK-WACC-NEXT: xxlxor 0, 0, 0 +; CHECK-WACC-NEXT: xvnegdp 1, 1 +; CHECK-WACC-NEXT: xvnegdp 2, 2 +; CHECK-WACC-NEXT: xvsubdp 1, 1, 0 +; CHECK-WACC-NEXT: xvsubdp 2, 2, 37 +; CHECK-WACC-NEXT: xvmuldp 1, 1, 0 +; CHECK-WACC-NEXT: xvmuldp 2, 2, 0 +; CHECK-WACC-NEXT: xvmaddadp 1, 0, 0 +; CHECK-WACC-NEXT: xvmaddadp 2, 0, 0 +; CHECK-WACC-NEXT: stxv 1, 0(3) +; CHECK-WACC-NEXT: stxv 2, 0(3) +; CHECK-WACC-NEXT: # implicit-def: $wacc0 +; CHECK-WACC-NEXT: bc 12, 20, L..BB0_2 +; CHECK-WACC-NEXT: # %bb.1: # %bb10 +; CHECK-WACC-NEXT: xvf64gerpp 0, 34, 0 +; CHECK-WACC-NEXT: L..BB0_2: # %bb12 +; CHECK-WACC-NEXT: cmpdi 3, 0 +; CHECK-WACC-NEXT: .align 4 +; CHECK-WACC-NEXT: L..BB0_3: # %bb13 +; CHECK-WACC-NEXT: # +; CHECK-WACC-NEXT: bc 4, 2, L..BB0_3 +; CHECK-WACC-NEXT: # %bb.4: # %bb14 +; CHECK-WACC-NEXT: dmxxextfdmr512 34, 36, 0, 0 +; CHECK-WACC-NEXT: xxlxor 0, 0, 0 +; CHECK-WACC-NEXT: xvsubdp 1, 0, 35 +; CHECK-WACC-NEXT: xxlxor 2, 2, 2 +; CHECK-WACC-NEXT: xvmaddadp 2, 1, 2 +; CHECK-WACC-NEXT: xvadddp 0, 2, 0 +; CHECK-WACC-NEXT: xxswapd 0, 0 +; CHECK-WACC-NEXT: stxv 0, 0(3) +; CHECK-WACC-NEXT: blr bb: %call = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> poison) %extractvalue = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %call, 0