From 715cf6ffb9a0491aa8749bf024d741de520fa1f2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 24 Sep 2021 14:29:55 -0700 Subject: [PATCH] [RISCV] Add another isel optimization for (and (shl X, c2), c1). Where c1 is a shifted mask with 32-c2 leading zeros and c3 trailing zeros and c3>c2. We can select it as (slli (srliw X, c3-c2), c3). --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 11 + .../CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll | 11 +- llvm/test/CodeGen/RISCV/rv64zbb.ll | 11 +- llvm/test/CodeGen/RISCV/rv64zbp.ll | 94 ++-- .../CodeGen/RISCV/rvv/fixed-vectors-bswap.ll | 406 +++++++++--------- 5 files changed, 267 insertions(+), 266 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index ac9bc5b05fbed..f942821e6bd81 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -700,6 +700,17 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, SLLI); return; } + // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. + if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !ZExtOrANDI) { + SDNode *SRLIW = CurDAG->getMachineNode( + RISCV::SRLIW, DL, XLenVT, X, + CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); + SDNode *SLLI = + CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), + CurDAG->getTargetConstant(C3, DL, XLenVT)); + ReplaceNode(Node, SLLI); + return; + } } break; diff --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll index d3a332c8b7085..8113b8d604d7a 100644 --- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll @@ -115,13 +115,12 @@ define i64 @test_bswap_i64(i64 %a) nounwind { ; RV64I-NEXT: srli a4, a0, 56 ; RV64I-NEXT: or a2, a2, a4 ; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: slli a4, a3, 32 +; RV64I-NEXT: slli a2, a0, 24 +; RV64I-NEXT: slli a4, a3, 40 ; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: slli a4, a0, 24 -; RV64I-NEXT: slli a5, a3, 40 -; RV64I-NEXT: and a4, a4, a5 -; RV64I-NEXT: or a2, a4, a2 +; RV64I-NEXT: srliw a4, a0, 24 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a2, a2, a4 ; RV64I-NEXT: slli a4, a0, 40 ; RV64I-NEXT: slli a3, a3, 48 ; RV64I-NEXT: and a3, a4, a3 diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll index c5ff85f8d6d17..8334ab0206c32 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -1585,13 +1585,12 @@ define i64 @bswap_i64(i64 %a) { ; RV64I-NEXT: srli a4, a0, 56 ; RV64I-NEXT: or a2, a2, a4 ; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: slli a4, a3, 32 +; RV64I-NEXT: slli a2, a0, 24 +; RV64I-NEXT: slli a4, a3, 40 ; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: slli a4, a0, 24 -; RV64I-NEXT: slli a5, a3, 40 -; RV64I-NEXT: and a4, a4, a5 -; RV64I-NEXT: or a2, a4, a2 +; RV64I-NEXT: srliw a4, a0, 24 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a2, a2, a4 ; RV64I-NEXT: slli a4, a0, 40 ; RV64I-NEXT: slli a3, a3, 48 ; RV64I-NEXT: and a3, a4, a3 diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll index 96121858ff53c..0045248bf35d3 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll @@ -2752,13 +2752,12 @@ define i64 @bswap_i64(i64 %a) { ; RV64I-NEXT: srli a4, a0, 56 ; RV64I-NEXT: or a2, a2, a4 ; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: slli a4, a3, 32 +; RV64I-NEXT: slli a2, a0, 24 +; RV64I-NEXT: slli a4, a3, 40 ; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: slli a4, a0, 24 -; RV64I-NEXT: slli a5, a3, 40 -; RV64I-NEXT: and a4, a4, a5 -; RV64I-NEXT: or a2, a4, a2 +; RV64I-NEXT: srliw a4, a0, 24 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a2, a2, a4 ; RV64I-NEXT: slli a4, a0, 40 ; RV64I-NEXT: slli a3, a3, 48 ; RV64I-NEXT: and a3, a4, a3 @@ -2988,13 +2987,12 @@ define i64 @bitreverse_i64(i64 %a) nounwind { ; RV64I-NEXT: srli a4, a0, 56 ; RV64I-NEXT: or a2, a2, a4 ; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: slli a4, a3, 32 +; RV64I-NEXT: slli a2, a0, 24 +; RV64I-NEXT: slli a4, a3, 40 ; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: slli a4, a0, 24 -; RV64I-NEXT: slli a5, a3, 40 -; RV64I-NEXT: and a4, a4, a5 -; RV64I-NEXT: or a2, a4, a2 +; RV64I-NEXT: srliw a4, a0, 24 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a2, a2, a4 ; RV64I-NEXT: slli a4, a0, 40 ; RV64I-NEXT: slli a3, a3, 48 ; RV64I-NEXT: and a3, a4, a3 @@ -3182,31 +3180,30 @@ define i32 @bitreverse_bswap_i32(i32 %a) { define i64 @bitreverse_bswap_i64(i64 %a) { ; RV64I-LABEL: bitreverse_bswap_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: srli a2, a0, 24 ; RV64I-NEXT: lui a6, 4080 -; RV64I-NEXT: and a1, a1, a6 -; RV64I-NEXT: srli a3, a0, 8 -; RV64I-NEXT: addi a5, zero, 255 -; RV64I-NEXT: slli a7, a5, 24 -; RV64I-NEXT: and a3, a3, a7 -; RV64I-NEXT: or a3, a3, a1 +; RV64I-NEXT: and a3, a2, a6 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: addi a1, zero, 255 +; RV64I-NEXT: slli a7, a1, 24 +; RV64I-NEXT: and a4, a4, a7 +; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: srli a4, a0, 40 -; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw a1, a1, -256 -; RV64I-NEXT: and a4, a4, a1 +; RV64I-NEXT: lui a5, 16 +; RV64I-NEXT: addiw a5, a5, -256 +; RV64I-NEXT: and a4, a4, a5 ; RV64I-NEXT: srli a2, a0, 56 ; RV64I-NEXT: or a2, a4, a2 ; RV64I-NEXT: or a2, a3, a2 -; RV64I-NEXT: slli a4, a0, 8 -; RV64I-NEXT: slli t0, a5, 32 -; RV64I-NEXT: and a3, a4, t0 ; RV64I-NEXT: slli a4, a0, 24 -; RV64I-NEXT: slli t1, a5, 40 -; RV64I-NEXT: and a4, a4, t1 +; RV64I-NEXT: slli t0, a1, 40 +; RV64I-NEXT: and a4, a4, t0 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: slli a3, a3, 32 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: slli a4, a0, 40 -; RV64I-NEXT: slli a5, a5, 48 -; RV64I-NEXT: and a4, a4, a5 +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: and a4, a4, a1 ; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: or a0, a0, a4 ; RV64I-NEXT: or a0, a0, a3 @@ -3251,26 +3248,26 @@ define i64 @bitreverse_bswap_i64(i64 %a) { ; RV64I-NEXT: slli a0, a0, 1 ; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: srli a2, a0, 40 -; RV64I-NEXT: and a1, a2, a1 -; RV64I-NEXT: srli a2, a0, 56 -; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 24 -; RV64I-NEXT: and a2, a2, a6 -; RV64I-NEXT: srli a3, a0, 8 -; RV64I-NEXT: and a3, a3, a7 +; RV64I-NEXT: and a2, a2, a5 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: and a3, a3, a6 +; RV64I-NEXT: srli a4, a0, 8 +; RV64I-NEXT: and a4, a4, a7 +; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a2, a3, a2 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: slli a2, a0, 8 -; RV64I-NEXT: and a2, a2, t0 ; RV64I-NEXT: slli a3, a0, 24 -; RV64I-NEXT: and a3, a3, t1 -; RV64I-NEXT: or a2, a3, a2 -; RV64I-NEXT: slli a3, a0, 40 -; RV64I-NEXT: and a3, a3, a5 +; RV64I-NEXT: and a3, a3, t0 +; RV64I-NEXT: srliw a4, a0, 24 +; RV64I-NEXT: slli a4, a4, 32 +; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 40 +; RV64I-NEXT: and a1, a4, a1 ; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64B-LABEL: bitreverse_bswap_i64: @@ -3637,13 +3634,12 @@ define i64 @shfl16(i64 %a, i64 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 16 ; RV64I-NEXT: addi a1, a1, -1 ; RV64I-NEXT: and a1, a0, a1 -; RV64I-NEXT: slli a2, a0, 16 -; RV64I-NEXT: lui a3, 65535 -; RV64I-NEXT: slli a4, a3, 20 -; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: srliw a2, a0, 16 +; RV64I-NEXT: slli a2, a2, 32 ; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: srli a0, a0, 16 -; RV64I-NEXT: slli a2, a3, 4 +; RV64I-NEXT: lui a2, 65535 +; RV64I-NEXT: slli a2, a2, 4 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll index 262a02e9735a9..16086b3272327 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -631,53 +631,52 @@ define void @bswap_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX2-RV64-NEXT: addiw a7, a3, -256 ; LMULMAX2-RV64-NEXT: and a2, a2, a7 ; LMULMAX2-RV64-NEXT: srli a4, a1, 56 -; LMULMAX2-RV64-NEXT: or t0, a2, a4 +; LMULMAX2-RV64-NEXT: or a2, a2, a4 ; LMULMAX2-RV64-NEXT: srli a4, a1, 24 ; LMULMAX2-RV64-NEXT: lui a6, 4080 ; LMULMAX2-RV64-NEXT: and a4, a4, a6 ; LMULMAX2-RV64-NEXT: srli a5, a1, 8 -; LMULMAX2-RV64-NEXT: addi a3, zero, 255 -; LMULMAX2-RV64-NEXT: slli a2, a3, 24 -; LMULMAX2-RV64-NEXT: and a5, a5, a2 +; LMULMAX2-RV64-NEXT: addi t0, zero, 255 +; LMULMAX2-RV64-NEXT: slli a3, t0, 24 +; LMULMAX2-RV64-NEXT: and a5, a5, a3 ; LMULMAX2-RV64-NEXT: or a4, a5, a4 -; LMULMAX2-RV64-NEXT: or t0, a4, t0 -; LMULMAX2-RV64-NEXT: slli a5, a1, 8 -; LMULMAX2-RV64-NEXT: slli t1, a3, 32 -; LMULMAX2-RV64-NEXT: and a5, a5, t1 +; LMULMAX2-RV64-NEXT: or t1, a4, a2 ; LMULMAX2-RV64-NEXT: slli a4, a1, 24 -; LMULMAX2-RV64-NEXT: slli t2, a3, 40 +; LMULMAX2-RV64-NEXT: slli t2, t0, 40 ; LMULMAX2-RV64-NEXT: and a4, a4, t2 -; LMULMAX2-RV64-NEXT: or a4, a4, a5 -; LMULMAX2-RV64-NEXT: slli a5, a1, 40 -; LMULMAX2-RV64-NEXT: slli a3, a3, 48 -; LMULMAX2-RV64-NEXT: and a5, a5, a3 +; LMULMAX2-RV64-NEXT: srliw a2, a1, 24 +; LMULMAX2-RV64-NEXT: slli a2, a2, 32 +; LMULMAX2-RV64-NEXT: or a2, a4, a2 +; LMULMAX2-RV64-NEXT: slli a4, a1, 40 +; LMULMAX2-RV64-NEXT: slli a5, t0, 48 +; LMULMAX2-RV64-NEXT: and a4, a4, a5 ; LMULMAX2-RV64-NEXT: slli a1, a1, 56 -; LMULMAX2-RV64-NEXT: or a1, a1, a5 ; LMULMAX2-RV64-NEXT: or a1, a1, a4 -; LMULMAX2-RV64-NEXT: or a1, a1, t0 +; LMULMAX2-RV64-NEXT: or a1, a1, a2 +; LMULMAX2-RV64-NEXT: or a1, a1, t1 ; LMULMAX2-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX2-RV64-NEXT: vmv.v.x v26, a1 ; LMULMAX2-RV64-NEXT: vmv.x.s a1, v25 -; LMULMAX2-RV64-NEXT: srli a4, a1, 24 -; LMULMAX2-RV64-NEXT: and a4, a4, a6 -; LMULMAX2-RV64-NEXT: srli a5, a1, 8 -; LMULMAX2-RV64-NEXT: and a2, a5, a2 -; LMULMAX2-RV64-NEXT: or a2, a2, a4 -; LMULMAX2-RV64-NEXT: srli a4, a1, 40 -; LMULMAX2-RV64-NEXT: and a4, a4, a7 -; LMULMAX2-RV64-NEXT: srli a5, a1, 56 -; LMULMAX2-RV64-NEXT: or a4, a4, a5 -; LMULMAX2-RV64-NEXT: or a2, a2, a4 -; LMULMAX2-RV64-NEXT: slli a4, a1, 8 -; LMULMAX2-RV64-NEXT: and a4, a4, t1 -; LMULMAX2-RV64-NEXT: slli a5, a1, 24 -; LMULMAX2-RV64-NEXT: and a5, a5, t2 -; LMULMAX2-RV64-NEXT: or a4, a5, a4 -; LMULMAX2-RV64-NEXT: slli a5, a1, 40 -; LMULMAX2-RV64-NEXT: and a3, a5, a3 +; LMULMAX2-RV64-NEXT: srli a2, a1, 24 +; LMULMAX2-RV64-NEXT: and a2, a2, a6 +; LMULMAX2-RV64-NEXT: srli a4, a1, 8 +; LMULMAX2-RV64-NEXT: and a3, a4, a3 +; LMULMAX2-RV64-NEXT: or a2, a3, a2 +; LMULMAX2-RV64-NEXT: srli a3, a1, 40 +; LMULMAX2-RV64-NEXT: and a3, a3, a7 +; LMULMAX2-RV64-NEXT: srli a4, a1, 56 +; LMULMAX2-RV64-NEXT: or a3, a3, a4 +; LMULMAX2-RV64-NEXT: or a2, a2, a3 +; LMULMAX2-RV64-NEXT: slli a3, a1, 24 +; LMULMAX2-RV64-NEXT: and a3, a3, t2 +; LMULMAX2-RV64-NEXT: srliw a4, a1, 24 +; LMULMAX2-RV64-NEXT: slli a4, a4, 32 +; LMULMAX2-RV64-NEXT: or a3, a3, a4 +; LMULMAX2-RV64-NEXT: slli a4, a1, 40 +; LMULMAX2-RV64-NEXT: and a4, a4, a5 ; LMULMAX2-RV64-NEXT: slli a1, a1, 56 -; LMULMAX2-RV64-NEXT: or a1, a1, a3 ; LMULMAX2-RV64-NEXT: or a1, a1, a4 +; LMULMAX2-RV64-NEXT: or a1, a1, a3 ; LMULMAX2-RV64-NEXT: or a1, a1, a2 ; LMULMAX2-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v26, a1 @@ -762,53 +761,52 @@ define void @bswap_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; LMULMAX1-RV64-NEXT: addiw a7, a3, -256 ; LMULMAX1-RV64-NEXT: and a2, a2, a7 ; LMULMAX1-RV64-NEXT: srli a4, a1, 56 -; LMULMAX1-RV64-NEXT: or t0, a2, a4 +; LMULMAX1-RV64-NEXT: or a2, a2, a4 ; LMULMAX1-RV64-NEXT: srli a4, a1, 24 ; LMULMAX1-RV64-NEXT: lui a6, 4080 ; LMULMAX1-RV64-NEXT: and a4, a4, a6 ; LMULMAX1-RV64-NEXT: srli a5, a1, 8 -; LMULMAX1-RV64-NEXT: addi a3, zero, 255 -; LMULMAX1-RV64-NEXT: slli a2, a3, 24 -; LMULMAX1-RV64-NEXT: and a5, a5, a2 +; LMULMAX1-RV64-NEXT: addi t0, zero, 255 +; LMULMAX1-RV64-NEXT: slli a3, t0, 24 +; LMULMAX1-RV64-NEXT: and a5, a5, a3 ; LMULMAX1-RV64-NEXT: or a4, a5, a4 -; LMULMAX1-RV64-NEXT: or t0, a4, t0 -; LMULMAX1-RV64-NEXT: slli a5, a1, 8 -; LMULMAX1-RV64-NEXT: slli t1, a3, 32 -; LMULMAX1-RV64-NEXT: and a5, a5, t1 +; LMULMAX1-RV64-NEXT: or t1, a4, a2 ; LMULMAX1-RV64-NEXT: slli a4, a1, 24 -; LMULMAX1-RV64-NEXT: slli t2, a3, 40 +; LMULMAX1-RV64-NEXT: slli t2, t0, 40 ; LMULMAX1-RV64-NEXT: and a4, a4, t2 -; LMULMAX1-RV64-NEXT: or a4, a4, a5 -; LMULMAX1-RV64-NEXT: slli a5, a1, 40 -; LMULMAX1-RV64-NEXT: slli a3, a3, 48 -; LMULMAX1-RV64-NEXT: and a5, a5, a3 +; LMULMAX1-RV64-NEXT: srliw a2, a1, 24 +; LMULMAX1-RV64-NEXT: slli a2, a2, 32 +; LMULMAX1-RV64-NEXT: or a2, a4, a2 +; LMULMAX1-RV64-NEXT: slli a4, a1, 40 +; LMULMAX1-RV64-NEXT: slli a5, t0, 48 +; LMULMAX1-RV64-NEXT: and a4, a4, a5 ; LMULMAX1-RV64-NEXT: slli a1, a1, 56 -; LMULMAX1-RV64-NEXT: or a1, a1, a5 ; LMULMAX1-RV64-NEXT: or a1, a1, a4 -; LMULMAX1-RV64-NEXT: or a1, a1, t0 +; LMULMAX1-RV64-NEXT: or a1, a1, a2 +; LMULMAX1-RV64-NEXT: or a1, a1, t1 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vmv.v.x v26, a1 ; LMULMAX1-RV64-NEXT: vmv.x.s a1, v25 -; LMULMAX1-RV64-NEXT: srli a4, a1, 24 -; LMULMAX1-RV64-NEXT: and a4, a4, a6 -; LMULMAX1-RV64-NEXT: srli a5, a1, 8 -; LMULMAX1-RV64-NEXT: and a2, a5, a2 -; LMULMAX1-RV64-NEXT: or a2, a2, a4 -; LMULMAX1-RV64-NEXT: srli a4, a1, 40 -; LMULMAX1-RV64-NEXT: and a4, a4, a7 -; LMULMAX1-RV64-NEXT: srli a5, a1, 56 -; LMULMAX1-RV64-NEXT: or a4, a4, a5 -; LMULMAX1-RV64-NEXT: or a2, a2, a4 -; LMULMAX1-RV64-NEXT: slli a4, a1, 8 -; LMULMAX1-RV64-NEXT: and a4, a4, t1 -; LMULMAX1-RV64-NEXT: slli a5, a1, 24 -; LMULMAX1-RV64-NEXT: and a5, a5, t2 -; LMULMAX1-RV64-NEXT: or a4, a5, a4 -; LMULMAX1-RV64-NEXT: slli a5, a1, 40 -; LMULMAX1-RV64-NEXT: and a3, a5, a3 +; LMULMAX1-RV64-NEXT: srli a2, a1, 24 +; LMULMAX1-RV64-NEXT: and a2, a2, a6 +; LMULMAX1-RV64-NEXT: srli a4, a1, 8 +; LMULMAX1-RV64-NEXT: and a3, a4, a3 +; LMULMAX1-RV64-NEXT: or a2, a3, a2 +; LMULMAX1-RV64-NEXT: srli a3, a1, 40 +; LMULMAX1-RV64-NEXT: and a3, a3, a7 +; LMULMAX1-RV64-NEXT: srli a4, a1, 56 +; LMULMAX1-RV64-NEXT: or a3, a3, a4 +; LMULMAX1-RV64-NEXT: or a2, a2, a3 +; LMULMAX1-RV64-NEXT: slli a3, a1, 24 +; LMULMAX1-RV64-NEXT: and a3, a3, t2 +; LMULMAX1-RV64-NEXT: srliw a4, a1, 24 +; LMULMAX1-RV64-NEXT: slli a4, a4, 32 +; LMULMAX1-RV64-NEXT: or a3, a3, a4 +; LMULMAX1-RV64-NEXT: slli a4, a1, 40 +; LMULMAX1-RV64-NEXT: and a4, a4, a5 ; LMULMAX1-RV64-NEXT: slli a1, a1, 56 -; LMULMAX1-RV64-NEXT: or a1, a1, a3 ; LMULMAX1-RV64-NEXT: or a1, a1, a4 +; LMULMAX1-RV64-NEXT: or a1, a1, a3 ; LMULMAX1-RV64-NEXT: or a1, a1, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; LMULMAX1-RV64-NEXT: vmv.s.x v26, a1 @@ -1980,57 +1978,56 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV64-NEXT: andi sp, sp, -32 ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV64-NEXT: vle64.v v26, (a0) -; LMULMAX2-RV64-NEXT: vmv.x.s a2, v26 -; LMULMAX2-RV64-NEXT: srli a1, a2, 24 +; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26 +; LMULMAX2-RV64-NEXT: srli a2, a1, 24 ; LMULMAX2-RV64-NEXT: lui a6, 4080 -; LMULMAX2-RV64-NEXT: and a1, a1, a6 -; LMULMAX2-RV64-NEXT: srli a3, a2, 8 -; LMULMAX2-RV64-NEXT: addi a5, zero, 255 -; LMULMAX2-RV64-NEXT: slli a7, a5, 24 -; LMULMAX2-RV64-NEXT: and a3, a3, a7 -; LMULMAX2-RV64-NEXT: or a3, a3, a1 -; LMULMAX2-RV64-NEXT: srli a4, a2, 40 -; LMULMAX2-RV64-NEXT: lui a1, 16 -; LMULMAX2-RV64-NEXT: addiw t0, a1, -256 +; LMULMAX2-RV64-NEXT: and a3, a2, a6 +; LMULMAX2-RV64-NEXT: srli a4, a1, 8 +; LMULMAX2-RV64-NEXT: addi a7, zero, 255 +; LMULMAX2-RV64-NEXT: slli t0, a7, 24 ; LMULMAX2-RV64-NEXT: and a4, a4, t0 -; LMULMAX2-RV64-NEXT: srli a1, a2, 56 -; LMULMAX2-RV64-NEXT: or a1, a4, a1 -; LMULMAX2-RV64-NEXT: or a1, a3, a1 -; LMULMAX2-RV64-NEXT: slli a4, a2, 8 -; LMULMAX2-RV64-NEXT: slli t1, a5, 32 -; LMULMAX2-RV64-NEXT: and a3, a4, t1 -; LMULMAX2-RV64-NEXT: slli a4, a2, 24 -; LMULMAX2-RV64-NEXT: slli t2, a5, 40 -; LMULMAX2-RV64-NEXT: and a4, a4, t2 ; LMULMAX2-RV64-NEXT: or a3, a4, a3 -; LMULMAX2-RV64-NEXT: slli a4, a2, 40 -; LMULMAX2-RV64-NEXT: slli a5, a5, 48 +; LMULMAX2-RV64-NEXT: srli a4, a1, 40 +; LMULMAX2-RV64-NEXT: lui a5, 16 +; LMULMAX2-RV64-NEXT: addiw a5, a5, -256 ; LMULMAX2-RV64-NEXT: and a4, a4, a5 -; LMULMAX2-RV64-NEXT: slli a2, a2, 56 -; LMULMAX2-RV64-NEXT: or a2, a2, a4 -; LMULMAX2-RV64-NEXT: or a2, a2, a3 -; LMULMAX2-RV64-NEXT: or a1, a2, a1 +; LMULMAX2-RV64-NEXT: srli a2, a1, 56 +; LMULMAX2-RV64-NEXT: or a2, a4, a2 +; LMULMAX2-RV64-NEXT: or a2, a3, a2 +; LMULMAX2-RV64-NEXT: slli a4, a1, 24 +; LMULMAX2-RV64-NEXT: slli t1, a7, 40 +; LMULMAX2-RV64-NEXT: and a4, a4, t1 +; LMULMAX2-RV64-NEXT: srliw a3, a1, 24 +; LMULMAX2-RV64-NEXT: slli a3, a3, 32 +; LMULMAX2-RV64-NEXT: or a3, a4, a3 +; LMULMAX2-RV64-NEXT: slli a4, a1, 40 +; LMULMAX2-RV64-NEXT: slli a7, a7, 48 +; LMULMAX2-RV64-NEXT: and a4, a4, a7 +; LMULMAX2-RV64-NEXT: slli a1, a1, 56 +; LMULMAX2-RV64-NEXT: or a1, a1, a4 +; LMULMAX2-RV64-NEXT: or a1, a1, a3 +; LMULMAX2-RV64-NEXT: or a1, a1, a2 ; LMULMAX2-RV64-NEXT: sd a1, 32(sp) ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e64, m2, ta, mu ; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 3 ; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV64-NEXT: srli a2, a1, 40 -; LMULMAX2-RV64-NEXT: and a2, a2, t0 +; LMULMAX2-RV64-NEXT: and a2, a2, a5 ; LMULMAX2-RV64-NEXT: srli a3, a1, 56 ; LMULMAX2-RV64-NEXT: or a2, a2, a3 ; LMULMAX2-RV64-NEXT: srli a3, a1, 24 ; LMULMAX2-RV64-NEXT: and a3, a3, a6 ; LMULMAX2-RV64-NEXT: srli a4, a1, 8 -; LMULMAX2-RV64-NEXT: and a4, a4, a7 +; LMULMAX2-RV64-NEXT: and a4, a4, t0 ; LMULMAX2-RV64-NEXT: or a3, a4, a3 ; LMULMAX2-RV64-NEXT: or a2, a3, a2 -; LMULMAX2-RV64-NEXT: slli a3, a1, 8 +; LMULMAX2-RV64-NEXT: slli a3, a1, 24 ; LMULMAX2-RV64-NEXT: and a3, a3, t1 -; LMULMAX2-RV64-NEXT: slli a4, a1, 24 -; LMULMAX2-RV64-NEXT: and a4, a4, t2 -; LMULMAX2-RV64-NEXT: or a3, a4, a3 +; LMULMAX2-RV64-NEXT: srliw a4, a1, 24 +; LMULMAX2-RV64-NEXT: slli a4, a4, 32 +; LMULMAX2-RV64-NEXT: or a3, a3, a4 ; LMULMAX2-RV64-NEXT: slli a4, a1, 40 -; LMULMAX2-RV64-NEXT: and a4, a4, a5 +; LMULMAX2-RV64-NEXT: and a4, a4, a7 ; LMULMAX2-RV64-NEXT: slli a1, a1, 56 ; LMULMAX2-RV64-NEXT: or a1, a1, a4 ; LMULMAX2-RV64-NEXT: or a1, a1, a3 @@ -2039,22 +2036,22 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV64-NEXT: vslidedown.vi v28, v26, 2 ; LMULMAX2-RV64-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV64-NEXT: srli a2, a1, 40 -; LMULMAX2-RV64-NEXT: and a2, a2, t0 +; LMULMAX2-RV64-NEXT: and a2, a2, a5 ; LMULMAX2-RV64-NEXT: srli a3, a1, 56 ; LMULMAX2-RV64-NEXT: or a2, a2, a3 ; LMULMAX2-RV64-NEXT: srli a3, a1, 24 ; LMULMAX2-RV64-NEXT: and a3, a3, a6 ; LMULMAX2-RV64-NEXT: srli a4, a1, 8 -; LMULMAX2-RV64-NEXT: and a4, a4, a7 +; LMULMAX2-RV64-NEXT: and a4, a4, t0 ; LMULMAX2-RV64-NEXT: or a3, a4, a3 ; LMULMAX2-RV64-NEXT: or a2, a3, a2 -; LMULMAX2-RV64-NEXT: slli a3, a1, 8 +; LMULMAX2-RV64-NEXT: slli a3, a1, 24 ; LMULMAX2-RV64-NEXT: and a3, a3, t1 -; LMULMAX2-RV64-NEXT: slli a4, a1, 24 -; LMULMAX2-RV64-NEXT: and a4, a4, t2 -; LMULMAX2-RV64-NEXT: or a3, a4, a3 +; LMULMAX2-RV64-NEXT: srliw a4, a1, 24 +; LMULMAX2-RV64-NEXT: slli a4, a4, 32 +; LMULMAX2-RV64-NEXT: or a3, a3, a4 ; LMULMAX2-RV64-NEXT: slli a4, a1, 40 -; LMULMAX2-RV64-NEXT: and a4, a4, a5 +; LMULMAX2-RV64-NEXT: and a4, a4, a7 ; LMULMAX2-RV64-NEXT: slli a1, a1, 56 ; LMULMAX2-RV64-NEXT: or a1, a1, a4 ; LMULMAX2-RV64-NEXT: or a1, a1, a3 @@ -2063,22 +2060,22 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-RV64-NEXT: vslidedown.vi v26, v26, 1 ; LMULMAX2-RV64-NEXT: vmv.x.s a1, v26 ; LMULMAX2-RV64-NEXT: srli a2, a1, 40 -; LMULMAX2-RV64-NEXT: and a2, a2, t0 +; LMULMAX2-RV64-NEXT: and a2, a2, a5 ; LMULMAX2-RV64-NEXT: srli a3, a1, 56 ; LMULMAX2-RV64-NEXT: or a2, a2, a3 ; LMULMAX2-RV64-NEXT: srli a3, a1, 24 ; LMULMAX2-RV64-NEXT: and a3, a3, a6 ; LMULMAX2-RV64-NEXT: srli a4, a1, 8 -; LMULMAX2-RV64-NEXT: and a4, a4, a7 +; LMULMAX2-RV64-NEXT: and a4, a4, t0 ; LMULMAX2-RV64-NEXT: or a3, a4, a3 ; LMULMAX2-RV64-NEXT: or a2, a3, a2 -; LMULMAX2-RV64-NEXT: slli a3, a1, 8 +; LMULMAX2-RV64-NEXT: slli a3, a1, 24 ; LMULMAX2-RV64-NEXT: and a3, a3, t1 -; LMULMAX2-RV64-NEXT: slli a4, a1, 24 -; LMULMAX2-RV64-NEXT: and a4, a4, t2 -; LMULMAX2-RV64-NEXT: or a3, a4, a3 +; LMULMAX2-RV64-NEXT: srliw a4, a1, 24 +; LMULMAX2-RV64-NEXT: slli a4, a4, 32 +; LMULMAX2-RV64-NEXT: or a3, a3, a4 ; LMULMAX2-RV64-NEXT: slli a4, a1, 40 -; LMULMAX2-RV64-NEXT: and a4, a4, a5 +; LMULMAX2-RV64-NEXT: and a4, a4, a7 ; LMULMAX2-RV64-NEXT: slli a1, a1, 56 ; LMULMAX2-RV64-NEXT: or a1, a1, a4 ; LMULMAX2-RV64-NEXT: or a1, a1, a3 @@ -2220,110 +2217,109 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX1-RV64-NEXT: vle64.v v25, (a0) ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vslidedown.vi v26, v27, 1 -; LMULMAX1-RV64-NEXT: vmv.x.s a2, v26 -; LMULMAX1-RV64-NEXT: srli a1, a2, 40 -; LMULMAX1-RV64-NEXT: lui a3, 16 -; LMULMAX1-RV64-NEXT: addiw t0, a3, -256 +; LMULMAX1-RV64-NEXT: vmv.x.s a4, v26 +; LMULMAX1-RV64-NEXT: srli a1, a4, 40 +; LMULMAX1-RV64-NEXT: lui a2, 16 +; LMULMAX1-RV64-NEXT: addiw t0, a2, -256 ; LMULMAX1-RV64-NEXT: and a1, a1, t0 -; LMULMAX1-RV64-NEXT: srli a3, a2, 56 -; LMULMAX1-RV64-NEXT: or a1, a1, a3 -; LMULMAX1-RV64-NEXT: srli a3, a2, 24 +; LMULMAX1-RV64-NEXT: srli a3, a4, 56 +; LMULMAX1-RV64-NEXT: or a3, a1, a3 +; LMULMAX1-RV64-NEXT: srli a1, a4, 24 ; LMULMAX1-RV64-NEXT: lui a7, 4080 -; LMULMAX1-RV64-NEXT: and a3, a3, a7 -; LMULMAX1-RV64-NEXT: srli a5, a2, 8 -; LMULMAX1-RV64-NEXT: addi a4, zero, 255 -; LMULMAX1-RV64-NEXT: slli t1, a4, 24 -; LMULMAX1-RV64-NEXT: and a5, a5, t1 +; LMULMAX1-RV64-NEXT: and a5, a1, a7 +; LMULMAX1-RV64-NEXT: srli a2, a4, 8 +; LMULMAX1-RV64-NEXT: addi a1, zero, 255 +; LMULMAX1-RV64-NEXT: slli t1, a1, 24 +; LMULMAX1-RV64-NEXT: and a2, a2, t1 +; LMULMAX1-RV64-NEXT: or a2, a2, a5 +; LMULMAX1-RV64-NEXT: or a2, a2, a3 +; LMULMAX1-RV64-NEXT: slli a5, a4, 24 +; LMULMAX1-RV64-NEXT: slli t2, a1, 40 +; LMULMAX1-RV64-NEXT: and a5, a5, t2 +; LMULMAX1-RV64-NEXT: srliw a3, a4, 24 +; LMULMAX1-RV64-NEXT: slli a3, a3, 32 ; LMULMAX1-RV64-NEXT: or a3, a5, a3 -; LMULMAX1-RV64-NEXT: or a3, a3, a1 -; LMULMAX1-RV64-NEXT: slli a1, a2, 8 -; LMULMAX1-RV64-NEXT: slli t2, a4, 32 -; LMULMAX1-RV64-NEXT: and a1, a1, t2 -; LMULMAX1-RV64-NEXT: slli a5, a2, 24 -; LMULMAX1-RV64-NEXT: slli t3, a4, 40 -; LMULMAX1-RV64-NEXT: and a5, a5, t3 -; LMULMAX1-RV64-NEXT: or a5, a5, a1 -; LMULMAX1-RV64-NEXT: slli a1, a2, 40 -; LMULMAX1-RV64-NEXT: slli a4, a4, 48 -; LMULMAX1-RV64-NEXT: and a1, a1, a4 -; LMULMAX1-RV64-NEXT: slli a2, a2, 56 -; LMULMAX1-RV64-NEXT: or a1, a2, a1 -; LMULMAX1-RV64-NEXT: or a1, a1, a5 -; LMULMAX1-RV64-NEXT: or a1, a1, a3 -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: vmv.v.x v26, a1 -; LMULMAX1-RV64-NEXT: vmv.x.s a1, v27 -; LMULMAX1-RV64-NEXT: srli a2, a1, 24 -; LMULMAX1-RV64-NEXT: and a2, a2, a7 -; LMULMAX1-RV64-NEXT: srli a3, a1, 8 -; LMULMAX1-RV64-NEXT: and a3, a3, t1 +; LMULMAX1-RV64-NEXT: slli a5, a4, 40 +; LMULMAX1-RV64-NEXT: slli a1, a1, 48 +; LMULMAX1-RV64-NEXT: and a5, a5, a1 +; LMULMAX1-RV64-NEXT: slli a4, a4, 56 +; LMULMAX1-RV64-NEXT: or a4, a4, a5 +; LMULMAX1-RV64-NEXT: or a3, a4, a3 ; LMULMAX1-RV64-NEXT: or a2, a3, a2 -; LMULMAX1-RV64-NEXT: srli a3, a1, 40 -; LMULMAX1-RV64-NEXT: and a3, a3, t0 -; LMULMAX1-RV64-NEXT: srli a5, a1, 56 -; LMULMAX1-RV64-NEXT: or a3, a3, a5 +; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; LMULMAX1-RV64-NEXT: vmv.v.x v26, a2 +; LMULMAX1-RV64-NEXT: vmv.x.s a2, v27 +; LMULMAX1-RV64-NEXT: srli a3, a2, 24 +; LMULMAX1-RV64-NEXT: and a3, a3, a7 +; LMULMAX1-RV64-NEXT: srli a4, a2, 8 +; LMULMAX1-RV64-NEXT: and a4, a4, t1 +; LMULMAX1-RV64-NEXT: or a3, a4, a3 +; LMULMAX1-RV64-NEXT: srli a4, a2, 40 +; LMULMAX1-RV64-NEXT: and a4, a4, t0 +; LMULMAX1-RV64-NEXT: srli a5, a2, 56 +; LMULMAX1-RV64-NEXT: or a4, a4, a5 +; LMULMAX1-RV64-NEXT: or a3, a3, a4 +; LMULMAX1-RV64-NEXT: slli a4, a2, 24 +; LMULMAX1-RV64-NEXT: and a4, a4, t2 +; LMULMAX1-RV64-NEXT: srliw a5, a2, 24 +; LMULMAX1-RV64-NEXT: slli a5, a5, 32 +; LMULMAX1-RV64-NEXT: or a4, a4, a5 +; LMULMAX1-RV64-NEXT: slli a5, a2, 40 +; LMULMAX1-RV64-NEXT: and a5, a5, a1 +; LMULMAX1-RV64-NEXT: slli a2, a2, 56 +; LMULMAX1-RV64-NEXT: or a2, a2, a5 +; LMULMAX1-RV64-NEXT: or a2, a2, a4 ; LMULMAX1-RV64-NEXT: or a2, a2, a3 -; LMULMAX1-RV64-NEXT: slli a3, a1, 8 -; LMULMAX1-RV64-NEXT: and a3, a3, t2 -; LMULMAX1-RV64-NEXT: slli a5, a1, 24 -; LMULMAX1-RV64-NEXT: and a5, a5, t3 -; LMULMAX1-RV64-NEXT: or a3, a5, a3 -; LMULMAX1-RV64-NEXT: slli a5, a1, 40 -; LMULMAX1-RV64-NEXT: and a5, a5, a4 -; LMULMAX1-RV64-NEXT: slli a1, a1, 56 -; LMULMAX1-RV64-NEXT: or a1, a1, a5 -; LMULMAX1-RV64-NEXT: or a1, a1, a3 -; LMULMAX1-RV64-NEXT: or a1, a1, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-RV64-NEXT: vmv.s.x v26, a1 +; LMULMAX1-RV64-NEXT: vmv.s.x v26, a2 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vslidedown.vi v27, v25, 1 -; LMULMAX1-RV64-NEXT: vmv.x.s a1, v27 -; LMULMAX1-RV64-NEXT: srli a2, a1, 40 -; LMULMAX1-RV64-NEXT: and a2, a2, t0 -; LMULMAX1-RV64-NEXT: srli a3, a1, 56 -; LMULMAX1-RV64-NEXT: or a2, a2, a3 -; LMULMAX1-RV64-NEXT: srli a3, a1, 24 -; LMULMAX1-RV64-NEXT: and a3, a3, a7 -; LMULMAX1-RV64-NEXT: srli a5, a1, 8 -; LMULMAX1-RV64-NEXT: and a5, a5, t1 -; LMULMAX1-RV64-NEXT: or a3, a5, a3 -; LMULMAX1-RV64-NEXT: or a2, a3, a2 -; LMULMAX1-RV64-NEXT: slli a3, a1, 8 -; LMULMAX1-RV64-NEXT: and a3, a3, t2 -; LMULMAX1-RV64-NEXT: slli a5, a1, 24 -; LMULMAX1-RV64-NEXT: and a5, a5, t3 -; LMULMAX1-RV64-NEXT: or a3, a5, a3 -; LMULMAX1-RV64-NEXT: slli a5, a1, 40 -; LMULMAX1-RV64-NEXT: and a5, a5, a4 -; LMULMAX1-RV64-NEXT: slli a1, a1, 56 -; LMULMAX1-RV64-NEXT: or a1, a1, a5 -; LMULMAX1-RV64-NEXT: or a1, a1, a3 -; LMULMAX1-RV64-NEXT: or a1, a1, a2 -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: vmv.v.x v27, a1 -; LMULMAX1-RV64-NEXT: vmv.x.s a1, v25 -; LMULMAX1-RV64-NEXT: srli a2, a1, 24 -; LMULMAX1-RV64-NEXT: and a2, a2, a7 -; LMULMAX1-RV64-NEXT: srli a3, a1, 8 -; LMULMAX1-RV64-NEXT: and a3, a3, t1 -; LMULMAX1-RV64-NEXT: or a2, a3, a2 -; LMULMAX1-RV64-NEXT: srli a3, a1, 40 +; LMULMAX1-RV64-NEXT: vmv.x.s a2, v27 +; LMULMAX1-RV64-NEXT: srli a3, a2, 40 ; LMULMAX1-RV64-NEXT: and a3, a3, t0 -; LMULMAX1-RV64-NEXT: srli a5, a1, 56 -; LMULMAX1-RV64-NEXT: or a3, a3, a5 +; LMULMAX1-RV64-NEXT: srli a4, a2, 56 +; LMULMAX1-RV64-NEXT: or a3, a3, a4 +; LMULMAX1-RV64-NEXT: srli a4, a2, 24 +; LMULMAX1-RV64-NEXT: and a4, a4, a7 +; LMULMAX1-RV64-NEXT: srli a5, a2, 8 +; LMULMAX1-RV64-NEXT: and a5, a5, t1 +; LMULMAX1-RV64-NEXT: or a4, a5, a4 +; LMULMAX1-RV64-NEXT: or a3, a4, a3 +; LMULMAX1-RV64-NEXT: slli a4, a2, 24 +; LMULMAX1-RV64-NEXT: and a4, a4, t2 +; LMULMAX1-RV64-NEXT: srliw a5, a2, 24 +; LMULMAX1-RV64-NEXT: slli a5, a5, 32 +; LMULMAX1-RV64-NEXT: or a4, a4, a5 +; LMULMAX1-RV64-NEXT: slli a5, a2, 40 +; LMULMAX1-RV64-NEXT: and a5, a5, a1 +; LMULMAX1-RV64-NEXT: slli a2, a2, 56 +; LMULMAX1-RV64-NEXT: or a2, a2, a5 +; LMULMAX1-RV64-NEXT: or a2, a2, a4 ; LMULMAX1-RV64-NEXT: or a2, a2, a3 -; LMULMAX1-RV64-NEXT: slli a3, a1, 8 -; LMULMAX1-RV64-NEXT: and a3, a3, t2 -; LMULMAX1-RV64-NEXT: slli a5, a1, 24 -; LMULMAX1-RV64-NEXT: and a5, a5, t3 -; LMULMAX1-RV64-NEXT: or a3, a5, a3 -; LMULMAX1-RV64-NEXT: slli a5, a1, 40 -; LMULMAX1-RV64-NEXT: and a4, a5, a4 -; LMULMAX1-RV64-NEXT: slli a1, a1, 56 +; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; LMULMAX1-RV64-NEXT: vmv.v.x v27, a2 +; LMULMAX1-RV64-NEXT: vmv.x.s a2, v25 +; LMULMAX1-RV64-NEXT: srli a3, a2, 24 +; LMULMAX1-RV64-NEXT: and a3, a3, a7 +; LMULMAX1-RV64-NEXT: srli a4, a2, 8 +; LMULMAX1-RV64-NEXT: and a4, a4, t1 +; LMULMAX1-RV64-NEXT: or a3, a4, a3 +; LMULMAX1-RV64-NEXT: srli a4, a2, 40 +; LMULMAX1-RV64-NEXT: and a4, a4, t0 +; LMULMAX1-RV64-NEXT: srli a5, a2, 56 +; LMULMAX1-RV64-NEXT: or a4, a4, a5 +; LMULMAX1-RV64-NEXT: or a3, a3, a4 +; LMULMAX1-RV64-NEXT: slli a4, a2, 24 +; LMULMAX1-RV64-NEXT: and a4, a4, t2 +; LMULMAX1-RV64-NEXT: srliw a5, a2, 24 +; LMULMAX1-RV64-NEXT: slli a5, a5, 32 +; LMULMAX1-RV64-NEXT: or a4, a4, a5 +; LMULMAX1-RV64-NEXT: slli a5, a2, 40 +; LMULMAX1-RV64-NEXT: and a1, a5, a1 +; LMULMAX1-RV64-NEXT: slli a2, a2, 56 +; LMULMAX1-RV64-NEXT: or a1, a2, a1 ; LMULMAX1-RV64-NEXT: or a1, a1, a4 ; LMULMAX1-RV64-NEXT: or a1, a1, a3 -; LMULMAX1-RV64-NEXT: or a1, a1, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; LMULMAX1-RV64-NEXT: vmv.s.x v27, a1 ; LMULMAX1-RV64-NEXT: vse64.v v27, (a0)